net: introduce IFF_UNICAST_FLT private flag
[deliverable/linux.git] / drivers / net / ethernet / intel / igb / igb_main.c
1 /*******************************************************************************
2
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2011 Intel Corporation.
5
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
9
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 more details.
14
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
21
22 Contact Information:
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/bitops.h>
32 #include <linux/vmalloc.h>
33 #include <linux/pagemap.h>
34 #include <linux/netdevice.h>
35 #include <linux/ipv6.h>
36 #include <linux/slab.h>
37 #include <net/checksum.h>
38 #include <net/ip6_checksum.h>
39 #include <linux/net_tstamp.h>
40 #include <linux/mii.h>
41 #include <linux/ethtool.h>
42 #include <linux/if.h>
43 #include <linux/if_vlan.h>
44 #include <linux/pci.h>
45 #include <linux/pci-aspm.h>
46 #include <linux/delay.h>
47 #include <linux/interrupt.h>
48 #include <linux/if_ether.h>
49 #include <linux/aer.h>
50 #include <linux/prefetch.h>
51 #ifdef CONFIG_IGB_DCA
52 #include <linux/dca.h>
53 #endif
54 #include "igb.h"
55
56 #define MAJ 3
57 #define MIN 0
58 #define BUILD 6
59 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
60 __stringify(BUILD) "-k"
61 char igb_driver_name[] = "igb";
62 char igb_driver_version[] = DRV_VERSION;
63 static const char igb_driver_string[] =
64 "Intel(R) Gigabit Ethernet Network Driver";
65 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
66
67 static const struct e1000_info *igb_info_tbl[] = {
68 [board_82575] = &e1000_82575_info,
69 };
70
71 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
72 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
73 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
74 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
75 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
76 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
77 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
90 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
91 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
92 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
93 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
94 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
95 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
96 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
97 /* required last entry */
98 {0, }
99 };
100
101 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
102
103 void igb_reset(struct igb_adapter *);
104 static int igb_setup_all_tx_resources(struct igb_adapter *);
105 static int igb_setup_all_rx_resources(struct igb_adapter *);
106 static void igb_free_all_tx_resources(struct igb_adapter *);
107 static void igb_free_all_rx_resources(struct igb_adapter *);
108 static void igb_setup_mrqc(struct igb_adapter *);
109 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
110 static void __devexit igb_remove(struct pci_dev *pdev);
111 static void igb_init_hw_timer(struct igb_adapter *adapter);
112 static int igb_sw_init(struct igb_adapter *);
113 static int igb_open(struct net_device *);
114 static int igb_close(struct net_device *);
115 static void igb_configure_tx(struct igb_adapter *);
116 static void igb_configure_rx(struct igb_adapter *);
117 static void igb_clean_all_tx_rings(struct igb_adapter *);
118 static void igb_clean_all_rx_rings(struct igb_adapter *);
119 static void igb_clean_tx_ring(struct igb_ring *);
120 static void igb_clean_rx_ring(struct igb_ring *);
121 static void igb_set_rx_mode(struct net_device *);
122 static void igb_update_phy_info(unsigned long);
123 static void igb_watchdog(unsigned long);
124 static void igb_watchdog_task(struct work_struct *);
125 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
126 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
127 struct rtnl_link_stats64 *stats);
128 static int igb_change_mtu(struct net_device *, int);
129 static int igb_set_mac(struct net_device *, void *);
130 static void igb_set_uta(struct igb_adapter *adapter);
131 static irqreturn_t igb_intr(int irq, void *);
132 static irqreturn_t igb_intr_msi(int irq, void *);
133 static irqreturn_t igb_msix_other(int irq, void *);
134 static irqreturn_t igb_msix_ring(int irq, void *);
135 #ifdef CONFIG_IGB_DCA
136 static void igb_update_dca(struct igb_q_vector *);
137 static void igb_setup_dca(struct igb_adapter *);
138 #endif /* CONFIG_IGB_DCA */
139 static bool igb_clean_tx_irq(struct igb_q_vector *);
140 static int igb_poll(struct napi_struct *, int);
141 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
142 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
143 static void igb_tx_timeout(struct net_device *);
144 static void igb_reset_task(struct work_struct *);
145 static void igb_vlan_mode(struct net_device *netdev, u32 features);
146 static void igb_vlan_rx_add_vid(struct net_device *, u16);
147 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
148 static void igb_restore_vlan(struct igb_adapter *);
149 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
150 static void igb_ping_all_vfs(struct igb_adapter *);
151 static void igb_msg_task(struct igb_adapter *);
152 static void igb_vmm_control(struct igb_adapter *);
153 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
154 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
155 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
156 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
157 int vf, u16 vlan, u8 qos);
158 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
159 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
160 struct ifla_vf_info *ivi);
161 static void igb_check_vf_rate_limit(struct igb_adapter *);
162
163 #ifdef CONFIG_PM
164 static int igb_suspend(struct pci_dev *, pm_message_t);
165 static int igb_resume(struct pci_dev *);
166 #endif
167 static void igb_shutdown(struct pci_dev *);
168 #ifdef CONFIG_IGB_DCA
169 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
170 static struct notifier_block dca_notifier = {
171 .notifier_call = igb_notify_dca,
172 .next = NULL,
173 .priority = 0
174 };
175 #endif
176 #ifdef CONFIG_NET_POLL_CONTROLLER
177 /* for netdump / net console */
178 static void igb_netpoll(struct net_device *);
179 #endif
180 #ifdef CONFIG_PCI_IOV
181 static unsigned int max_vfs = 0;
182 module_param(max_vfs, uint, 0);
183 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
184 "per physical function");
185 #endif /* CONFIG_PCI_IOV */
186
187 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
188 pci_channel_state_t);
189 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
190 static void igb_io_resume(struct pci_dev *);
191
192 static struct pci_error_handlers igb_err_handler = {
193 .error_detected = igb_io_error_detected,
194 .slot_reset = igb_io_slot_reset,
195 .resume = igb_io_resume,
196 };
197
198
199 static struct pci_driver igb_driver = {
200 .name = igb_driver_name,
201 .id_table = igb_pci_tbl,
202 .probe = igb_probe,
203 .remove = __devexit_p(igb_remove),
204 #ifdef CONFIG_PM
205 /* Power Management Hooks */
206 .suspend = igb_suspend,
207 .resume = igb_resume,
208 #endif
209 .shutdown = igb_shutdown,
210 .err_handler = &igb_err_handler
211 };
212
213 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
214 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
215 MODULE_LICENSE("GPL");
216 MODULE_VERSION(DRV_VERSION);
217
218 struct igb_reg_info {
219 u32 ofs;
220 char *name;
221 };
222
223 static const struct igb_reg_info igb_reg_info_tbl[] = {
224
225 /* General Registers */
226 {E1000_CTRL, "CTRL"},
227 {E1000_STATUS, "STATUS"},
228 {E1000_CTRL_EXT, "CTRL_EXT"},
229
230 /* Interrupt Registers */
231 {E1000_ICR, "ICR"},
232
233 /* RX Registers */
234 {E1000_RCTL, "RCTL"},
235 {E1000_RDLEN(0), "RDLEN"},
236 {E1000_RDH(0), "RDH"},
237 {E1000_RDT(0), "RDT"},
238 {E1000_RXDCTL(0), "RXDCTL"},
239 {E1000_RDBAL(0), "RDBAL"},
240 {E1000_RDBAH(0), "RDBAH"},
241
242 /* TX Registers */
243 {E1000_TCTL, "TCTL"},
244 {E1000_TDBAL(0), "TDBAL"},
245 {E1000_TDBAH(0), "TDBAH"},
246 {E1000_TDLEN(0), "TDLEN"},
247 {E1000_TDH(0), "TDH"},
248 {E1000_TDT(0), "TDT"},
249 {E1000_TXDCTL(0), "TXDCTL"},
250 {E1000_TDFH, "TDFH"},
251 {E1000_TDFT, "TDFT"},
252 {E1000_TDFHS, "TDFHS"},
253 {E1000_TDFPC, "TDFPC"},
254
255 /* List Terminator */
256 {}
257 };
258
259 /*
260 * igb_regdump - register printout routine
261 */
262 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
263 {
264 int n = 0;
265 char rname[16];
266 u32 regs[8];
267
268 switch (reginfo->ofs) {
269 case E1000_RDLEN(0):
270 for (n = 0; n < 4; n++)
271 regs[n] = rd32(E1000_RDLEN(n));
272 break;
273 case E1000_RDH(0):
274 for (n = 0; n < 4; n++)
275 regs[n] = rd32(E1000_RDH(n));
276 break;
277 case E1000_RDT(0):
278 for (n = 0; n < 4; n++)
279 regs[n] = rd32(E1000_RDT(n));
280 break;
281 case E1000_RXDCTL(0):
282 for (n = 0; n < 4; n++)
283 regs[n] = rd32(E1000_RXDCTL(n));
284 break;
285 case E1000_RDBAL(0):
286 for (n = 0; n < 4; n++)
287 regs[n] = rd32(E1000_RDBAL(n));
288 break;
289 case E1000_RDBAH(0):
290 for (n = 0; n < 4; n++)
291 regs[n] = rd32(E1000_RDBAH(n));
292 break;
293 case E1000_TDBAL(0):
294 for (n = 0; n < 4; n++)
295 regs[n] = rd32(E1000_RDBAL(n));
296 break;
297 case E1000_TDBAH(0):
298 for (n = 0; n < 4; n++)
299 regs[n] = rd32(E1000_TDBAH(n));
300 break;
301 case E1000_TDLEN(0):
302 for (n = 0; n < 4; n++)
303 regs[n] = rd32(E1000_TDLEN(n));
304 break;
305 case E1000_TDH(0):
306 for (n = 0; n < 4; n++)
307 regs[n] = rd32(E1000_TDH(n));
308 break;
309 case E1000_TDT(0):
310 for (n = 0; n < 4; n++)
311 regs[n] = rd32(E1000_TDT(n));
312 break;
313 case E1000_TXDCTL(0):
314 for (n = 0; n < 4; n++)
315 regs[n] = rd32(E1000_TXDCTL(n));
316 break;
317 default:
318 printk(KERN_INFO "%-15s %08x\n",
319 reginfo->name, rd32(reginfo->ofs));
320 return;
321 }
322
323 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
324 printk(KERN_INFO "%-15s ", rname);
325 for (n = 0; n < 4; n++)
326 printk(KERN_CONT "%08x ", regs[n]);
327 printk(KERN_CONT "\n");
328 }
329
330 /*
331 * igb_dump - Print registers, tx-rings and rx-rings
332 */
333 static void igb_dump(struct igb_adapter *adapter)
334 {
335 struct net_device *netdev = adapter->netdev;
336 struct e1000_hw *hw = &adapter->hw;
337 struct igb_reg_info *reginfo;
338 int n = 0;
339 struct igb_ring *tx_ring;
340 union e1000_adv_tx_desc *tx_desc;
341 struct my_u0 { u64 a; u64 b; } *u0;
342 struct igb_buffer *buffer_info;
343 struct igb_ring *rx_ring;
344 union e1000_adv_rx_desc *rx_desc;
345 u32 staterr;
346 int i = 0;
347
348 if (!netif_msg_hw(adapter))
349 return;
350
351 /* Print netdevice Info */
352 if (netdev) {
353 dev_info(&adapter->pdev->dev, "Net device Info\n");
354 printk(KERN_INFO "Device Name state "
355 "trans_start last_rx\n");
356 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
357 netdev->name,
358 netdev->state,
359 netdev->trans_start,
360 netdev->last_rx);
361 }
362
363 /* Print Registers */
364 dev_info(&adapter->pdev->dev, "Register Dump\n");
365 printk(KERN_INFO " Register Name Value\n");
366 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
367 reginfo->name; reginfo++) {
368 igb_regdump(hw, reginfo);
369 }
370
371 /* Print TX Ring Summary */
372 if (!netdev || !netif_running(netdev))
373 goto exit;
374
375 dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
376 printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma ]"
377 " leng ntw timestamp\n");
378 for (n = 0; n < adapter->num_tx_queues; n++) {
379 tx_ring = adapter->tx_ring[n];
380 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
381 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
382 n, tx_ring->next_to_use, tx_ring->next_to_clean,
383 (u64)buffer_info->dma,
384 buffer_info->length,
385 buffer_info->next_to_watch,
386 (u64)buffer_info->time_stamp);
387 }
388
389 /* Print TX Rings */
390 if (!netif_msg_tx_done(adapter))
391 goto rx_ring_summary;
392
393 dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
394
395 /* Transmit Descriptor Formats
396 *
397 * Advanced Transmit Descriptor
398 * +--------------------------------------------------------------+
399 * 0 | Buffer Address [63:0] |
400 * +--------------------------------------------------------------+
401 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
402 * +--------------------------------------------------------------+
403 * 63 46 45 40 39 38 36 35 32 31 24 15 0
404 */
405
406 for (n = 0; n < adapter->num_tx_queues; n++) {
407 tx_ring = adapter->tx_ring[n];
408 printk(KERN_INFO "------------------------------------\n");
409 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
410 printk(KERN_INFO "------------------------------------\n");
411 printk(KERN_INFO "T [desc] [address 63:0 ] "
412 "[PlPOCIStDDM Ln] [bi->dma ] "
413 "leng ntw timestamp bi->skb\n");
414
415 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
416 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
417 buffer_info = &tx_ring->buffer_info[i];
418 u0 = (struct my_u0 *)tx_desc;
419 printk(KERN_INFO "T [0x%03X] %016llX %016llX %016llX"
420 " %04X %3X %016llX %p", i,
421 le64_to_cpu(u0->a),
422 le64_to_cpu(u0->b),
423 (u64)buffer_info->dma,
424 buffer_info->length,
425 buffer_info->next_to_watch,
426 (u64)buffer_info->time_stamp,
427 buffer_info->skb);
428 if (i == tx_ring->next_to_use &&
429 i == tx_ring->next_to_clean)
430 printk(KERN_CONT " NTC/U\n");
431 else if (i == tx_ring->next_to_use)
432 printk(KERN_CONT " NTU\n");
433 else if (i == tx_ring->next_to_clean)
434 printk(KERN_CONT " NTC\n");
435 else
436 printk(KERN_CONT "\n");
437
438 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
439 print_hex_dump(KERN_INFO, "",
440 DUMP_PREFIX_ADDRESS,
441 16, 1, phys_to_virt(buffer_info->dma),
442 buffer_info->length, true);
443 }
444 }
445
446 /* Print RX Rings Summary */
447 rx_ring_summary:
448 dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
449 printk(KERN_INFO "Queue [NTU] [NTC]\n");
450 for (n = 0; n < adapter->num_rx_queues; n++) {
451 rx_ring = adapter->rx_ring[n];
452 printk(KERN_INFO " %5d %5X %5X\n", n,
453 rx_ring->next_to_use, rx_ring->next_to_clean);
454 }
455
456 /* Print RX Rings */
457 if (!netif_msg_rx_status(adapter))
458 goto exit;
459
460 dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
461
462 /* Advanced Receive Descriptor (Read) Format
463 * 63 1 0
464 * +-----------------------------------------------------+
465 * 0 | Packet Buffer Address [63:1] |A0/NSE|
466 * +----------------------------------------------+------+
467 * 8 | Header Buffer Address [63:1] | DD |
468 * +-----------------------------------------------------+
469 *
470 *
471 * Advanced Receive Descriptor (Write-Back) Format
472 *
473 * 63 48 47 32 31 30 21 20 17 16 4 3 0
474 * +------------------------------------------------------+
475 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
476 * | Checksum Ident | | | | Type | Type |
477 * +------------------------------------------------------+
478 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
479 * +------------------------------------------------------+
480 * 63 48 47 32 31 20 19 0
481 */
482
483 for (n = 0; n < adapter->num_rx_queues; n++) {
484 rx_ring = adapter->rx_ring[n];
485 printk(KERN_INFO "------------------------------------\n");
486 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
487 printk(KERN_INFO "------------------------------------\n");
488 printk(KERN_INFO "R [desc] [ PktBuf A0] "
489 "[ HeadBuf DD] [bi->dma ] [bi->skb] "
490 "<-- Adv Rx Read format\n");
491 printk(KERN_INFO "RWB[desc] [PcsmIpSHl PtRs] "
492 "[vl er S cks ln] ---------------- [bi->skb] "
493 "<-- Adv Rx Write-Back format\n");
494
495 for (i = 0; i < rx_ring->count; i++) {
496 buffer_info = &rx_ring->buffer_info[i];
497 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
498 u0 = (struct my_u0 *)rx_desc;
499 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
500 if (staterr & E1000_RXD_STAT_DD) {
501 /* Descriptor Done */
502 printk(KERN_INFO "RWB[0x%03X] %016llX "
503 "%016llX ---------------- %p", i,
504 le64_to_cpu(u0->a),
505 le64_to_cpu(u0->b),
506 buffer_info->skb);
507 } else {
508 printk(KERN_INFO "R [0x%03X] %016llX "
509 "%016llX %016llX %p", i,
510 le64_to_cpu(u0->a),
511 le64_to_cpu(u0->b),
512 (u64)buffer_info->dma,
513 buffer_info->skb);
514
515 if (netif_msg_pktdata(adapter)) {
516 print_hex_dump(KERN_INFO, "",
517 DUMP_PREFIX_ADDRESS,
518 16, 1,
519 phys_to_virt(buffer_info->dma),
520 rx_ring->rx_buffer_len, true);
521 if (rx_ring->rx_buffer_len
522 < IGB_RXBUFFER_1024)
523 print_hex_dump(KERN_INFO, "",
524 DUMP_PREFIX_ADDRESS,
525 16, 1,
526 phys_to_virt(
527 buffer_info->page_dma +
528 buffer_info->page_offset),
529 PAGE_SIZE/2, true);
530 }
531 }
532
533 if (i == rx_ring->next_to_use)
534 printk(KERN_CONT " NTU\n");
535 else if (i == rx_ring->next_to_clean)
536 printk(KERN_CONT " NTC\n");
537 else
538 printk(KERN_CONT "\n");
539
540 }
541 }
542
543 exit:
544 return;
545 }
546
547
548 /**
549 * igb_read_clock - read raw cycle counter (to be used by time counter)
550 */
551 static cycle_t igb_read_clock(const struct cyclecounter *tc)
552 {
553 struct igb_adapter *adapter =
554 container_of(tc, struct igb_adapter, cycles);
555 struct e1000_hw *hw = &adapter->hw;
556 u64 stamp = 0;
557 int shift = 0;
558
559 /*
560 * The timestamp latches on lowest register read. For the 82580
561 * the lowest register is SYSTIMR instead of SYSTIML. However we never
562 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
563 */
564 if (hw->mac.type == e1000_82580) {
565 stamp = rd32(E1000_SYSTIMR) >> 8;
566 shift = IGB_82580_TSYNC_SHIFT;
567 }
568
569 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
570 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
571 return stamp;
572 }
573
574 /**
575 * igb_get_hw_dev - return device
576 * used by hardware layer to print debugging information
577 **/
578 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
579 {
580 struct igb_adapter *adapter = hw->back;
581 return adapter->netdev;
582 }
583
584 /**
585 * igb_init_module - Driver Registration Routine
586 *
587 * igb_init_module is the first routine called when the driver is
588 * loaded. All it does is register with the PCI subsystem.
589 **/
590 static int __init igb_init_module(void)
591 {
592 int ret;
593 printk(KERN_INFO "%s - version %s\n",
594 igb_driver_string, igb_driver_version);
595
596 printk(KERN_INFO "%s\n", igb_copyright);
597
598 #ifdef CONFIG_IGB_DCA
599 dca_register_notify(&dca_notifier);
600 #endif
601 ret = pci_register_driver(&igb_driver);
602 return ret;
603 }
604
605 module_init(igb_init_module);
606
607 /**
608 * igb_exit_module - Driver Exit Cleanup Routine
609 *
610 * igb_exit_module is called just before the driver is removed
611 * from memory.
612 **/
613 static void __exit igb_exit_module(void)
614 {
615 #ifdef CONFIG_IGB_DCA
616 dca_unregister_notify(&dca_notifier);
617 #endif
618 pci_unregister_driver(&igb_driver);
619 }
620
621 module_exit(igb_exit_module);
622
623 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
624 /**
625 * igb_cache_ring_register - Descriptor ring to register mapping
626 * @adapter: board private structure to initialize
627 *
628 * Once we know the feature-set enabled for the device, we'll cache
629 * the register offset the descriptor ring is assigned to.
630 **/
631 static void igb_cache_ring_register(struct igb_adapter *adapter)
632 {
633 int i = 0, j = 0;
634 u32 rbase_offset = adapter->vfs_allocated_count;
635
636 switch (adapter->hw.mac.type) {
637 case e1000_82576:
638 /* The queues are allocated for virtualization such that VF 0
639 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
640 * In order to avoid collision we start at the first free queue
641 * and continue consuming queues in the same sequence
642 */
643 if (adapter->vfs_allocated_count) {
644 for (; i < adapter->rss_queues; i++)
645 adapter->rx_ring[i]->reg_idx = rbase_offset +
646 Q_IDX_82576(i);
647 }
648 case e1000_82575:
649 case e1000_82580:
650 case e1000_i350:
651 default:
652 for (; i < adapter->num_rx_queues; i++)
653 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
654 for (; j < adapter->num_tx_queues; j++)
655 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
656 break;
657 }
658 }
659
660 static void igb_free_queues(struct igb_adapter *adapter)
661 {
662 int i;
663
664 for (i = 0; i < adapter->num_tx_queues; i++) {
665 kfree(adapter->tx_ring[i]);
666 adapter->tx_ring[i] = NULL;
667 }
668 for (i = 0; i < adapter->num_rx_queues; i++) {
669 kfree(adapter->rx_ring[i]);
670 adapter->rx_ring[i] = NULL;
671 }
672 adapter->num_rx_queues = 0;
673 adapter->num_tx_queues = 0;
674 }
675
676 /**
677 * igb_alloc_queues - Allocate memory for all rings
678 * @adapter: board private structure to initialize
679 *
680 * We allocate one ring per queue at run-time since we don't know the
681 * number of queues at compile-time.
682 **/
683 static int igb_alloc_queues(struct igb_adapter *adapter)
684 {
685 struct igb_ring *ring;
686 int i;
687
688 for (i = 0; i < adapter->num_tx_queues; i++) {
689 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
690 if (!ring)
691 goto err;
692 ring->count = adapter->tx_ring_count;
693 ring->queue_index = i;
694 ring->dev = &adapter->pdev->dev;
695 ring->netdev = adapter->netdev;
696 /* For 82575, context index must be unique per ring. */
697 if (adapter->hw.mac.type == e1000_82575)
698 ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
699 adapter->tx_ring[i] = ring;
700 }
701
702 for (i = 0; i < adapter->num_rx_queues; i++) {
703 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
704 if (!ring)
705 goto err;
706 ring->count = adapter->rx_ring_count;
707 ring->queue_index = i;
708 ring->dev = &adapter->pdev->dev;
709 ring->netdev = adapter->netdev;
710 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
711 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
712 /* set flag indicating ring supports SCTP checksum offload */
713 if (adapter->hw.mac.type >= e1000_82576)
714 ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
715 adapter->rx_ring[i] = ring;
716 }
717
718 igb_cache_ring_register(adapter);
719
720 return 0;
721
722 err:
723 igb_free_queues(adapter);
724
725 return -ENOMEM;
726 }
727
728 #define IGB_N0_QUEUE -1
729 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
730 {
731 u32 msixbm = 0;
732 struct igb_adapter *adapter = q_vector->adapter;
733 struct e1000_hw *hw = &adapter->hw;
734 u32 ivar, index;
735 int rx_queue = IGB_N0_QUEUE;
736 int tx_queue = IGB_N0_QUEUE;
737
738 if (q_vector->rx_ring)
739 rx_queue = q_vector->rx_ring->reg_idx;
740 if (q_vector->tx_ring)
741 tx_queue = q_vector->tx_ring->reg_idx;
742
743 switch (hw->mac.type) {
744 case e1000_82575:
745 /* The 82575 assigns vectors using a bitmask, which matches the
746 bitmask for the EICR/EIMS/EIMC registers. To assign one
747 or more queues to a vector, we write the appropriate bits
748 into the MSIXBM register for that vector. */
749 if (rx_queue > IGB_N0_QUEUE)
750 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
751 if (tx_queue > IGB_N0_QUEUE)
752 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
753 if (!adapter->msix_entries && msix_vector == 0)
754 msixbm |= E1000_EIMS_OTHER;
755 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
756 q_vector->eims_value = msixbm;
757 break;
758 case e1000_82576:
759 /* 82576 uses a table-based method for assigning vectors.
760 Each queue has a single entry in the table to which we write
761 a vector number along with a "valid" bit. Sadly, the layout
762 of the table is somewhat counterintuitive. */
763 if (rx_queue > IGB_N0_QUEUE) {
764 index = (rx_queue & 0x7);
765 ivar = array_rd32(E1000_IVAR0, index);
766 if (rx_queue < 8) {
767 /* vector goes into low byte of register */
768 ivar = ivar & 0xFFFFFF00;
769 ivar |= msix_vector | E1000_IVAR_VALID;
770 } else {
771 /* vector goes into third byte of register */
772 ivar = ivar & 0xFF00FFFF;
773 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
774 }
775 array_wr32(E1000_IVAR0, index, ivar);
776 }
777 if (tx_queue > IGB_N0_QUEUE) {
778 index = (tx_queue & 0x7);
779 ivar = array_rd32(E1000_IVAR0, index);
780 if (tx_queue < 8) {
781 /* vector goes into second byte of register */
782 ivar = ivar & 0xFFFF00FF;
783 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
784 } else {
785 /* vector goes into high byte of register */
786 ivar = ivar & 0x00FFFFFF;
787 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
788 }
789 array_wr32(E1000_IVAR0, index, ivar);
790 }
791 q_vector->eims_value = 1 << msix_vector;
792 break;
793 case e1000_82580:
794 case e1000_i350:
795 /* 82580 uses the same table-based approach as 82576 but has fewer
796 entries as a result we carry over for queues greater than 4. */
797 if (rx_queue > IGB_N0_QUEUE) {
798 index = (rx_queue >> 1);
799 ivar = array_rd32(E1000_IVAR0, index);
800 if (rx_queue & 0x1) {
801 /* vector goes into third byte of register */
802 ivar = ivar & 0xFF00FFFF;
803 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
804 } else {
805 /* vector goes into low byte of register */
806 ivar = ivar & 0xFFFFFF00;
807 ivar |= msix_vector | E1000_IVAR_VALID;
808 }
809 array_wr32(E1000_IVAR0, index, ivar);
810 }
811 if (tx_queue > IGB_N0_QUEUE) {
812 index = (tx_queue >> 1);
813 ivar = array_rd32(E1000_IVAR0, index);
814 if (tx_queue & 0x1) {
815 /* vector goes into high byte of register */
816 ivar = ivar & 0x00FFFFFF;
817 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
818 } else {
819 /* vector goes into second byte of register */
820 ivar = ivar & 0xFFFF00FF;
821 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
822 }
823 array_wr32(E1000_IVAR0, index, ivar);
824 }
825 q_vector->eims_value = 1 << msix_vector;
826 break;
827 default:
828 BUG();
829 break;
830 }
831
832 /* add q_vector eims value to global eims_enable_mask */
833 adapter->eims_enable_mask |= q_vector->eims_value;
834
835 /* configure q_vector to set itr on first interrupt */
836 q_vector->set_itr = 1;
837 }
838
839 /**
840 * igb_configure_msix - Configure MSI-X hardware
841 *
842 * igb_configure_msix sets up the hardware to properly
843 * generate MSI-X interrupts.
844 **/
845 static void igb_configure_msix(struct igb_adapter *adapter)
846 {
847 u32 tmp;
848 int i, vector = 0;
849 struct e1000_hw *hw = &adapter->hw;
850
851 adapter->eims_enable_mask = 0;
852
853 /* set vector for other causes, i.e. link changes */
854 switch (hw->mac.type) {
855 case e1000_82575:
856 tmp = rd32(E1000_CTRL_EXT);
857 /* enable MSI-X PBA support*/
858 tmp |= E1000_CTRL_EXT_PBA_CLR;
859
860 /* Auto-Mask interrupts upon ICR read. */
861 tmp |= E1000_CTRL_EXT_EIAME;
862 tmp |= E1000_CTRL_EXT_IRCA;
863
864 wr32(E1000_CTRL_EXT, tmp);
865
866 /* enable msix_other interrupt */
867 array_wr32(E1000_MSIXBM(0), vector++,
868 E1000_EIMS_OTHER);
869 adapter->eims_other = E1000_EIMS_OTHER;
870
871 break;
872
873 case e1000_82576:
874 case e1000_82580:
875 case e1000_i350:
876 /* Turn on MSI-X capability first, or our settings
877 * won't stick. And it will take days to debug. */
878 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
879 E1000_GPIE_PBA | E1000_GPIE_EIAME |
880 E1000_GPIE_NSICR);
881
882 /* enable msix_other interrupt */
883 adapter->eims_other = 1 << vector;
884 tmp = (vector++ | E1000_IVAR_VALID) << 8;
885
886 wr32(E1000_IVAR_MISC, tmp);
887 break;
888 default:
889 /* do nothing, since nothing else supports MSI-X */
890 break;
891 } /* switch (hw->mac.type) */
892
893 adapter->eims_enable_mask |= adapter->eims_other;
894
895 for (i = 0; i < adapter->num_q_vectors; i++)
896 igb_assign_vector(adapter->q_vector[i], vector++);
897
898 wrfl();
899 }
900
901 /**
902 * igb_request_msix - Initialize MSI-X interrupts
903 *
904 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
905 * kernel.
906 **/
907 static int igb_request_msix(struct igb_adapter *adapter)
908 {
909 struct net_device *netdev = adapter->netdev;
910 struct e1000_hw *hw = &adapter->hw;
911 int i, err = 0, vector = 0;
912
913 err = request_irq(adapter->msix_entries[vector].vector,
914 igb_msix_other, 0, netdev->name, adapter);
915 if (err)
916 goto out;
917 vector++;
918
919 for (i = 0; i < adapter->num_q_vectors; i++) {
920 struct igb_q_vector *q_vector = adapter->q_vector[i];
921
922 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
923
924 if (q_vector->rx_ring && q_vector->tx_ring)
925 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
926 q_vector->rx_ring->queue_index);
927 else if (q_vector->tx_ring)
928 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
929 q_vector->tx_ring->queue_index);
930 else if (q_vector->rx_ring)
931 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
932 q_vector->rx_ring->queue_index);
933 else
934 sprintf(q_vector->name, "%s-unused", netdev->name);
935
936 err = request_irq(adapter->msix_entries[vector].vector,
937 igb_msix_ring, 0, q_vector->name,
938 q_vector);
939 if (err)
940 goto out;
941 vector++;
942 }
943
944 igb_configure_msix(adapter);
945 return 0;
946 out:
947 return err;
948 }
949
950 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
951 {
952 if (adapter->msix_entries) {
953 pci_disable_msix(adapter->pdev);
954 kfree(adapter->msix_entries);
955 adapter->msix_entries = NULL;
956 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
957 pci_disable_msi(adapter->pdev);
958 }
959 }
960
961 /**
962 * igb_free_q_vectors - Free memory allocated for interrupt vectors
963 * @adapter: board private structure to initialize
964 *
965 * This function frees the memory allocated to the q_vectors. In addition if
966 * NAPI is enabled it will delete any references to the NAPI struct prior
967 * to freeing the q_vector.
968 **/
969 static void igb_free_q_vectors(struct igb_adapter *adapter)
970 {
971 int v_idx;
972
973 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
974 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
975 adapter->q_vector[v_idx] = NULL;
976 if (!q_vector)
977 continue;
978 netif_napi_del(&q_vector->napi);
979 kfree(q_vector);
980 }
981 adapter->num_q_vectors = 0;
982 }
983
984 /**
985 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
986 *
987 * This function resets the device so that it has 0 rx queues, tx queues, and
988 * MSI-X interrupts allocated.
989 */
990 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
991 {
992 igb_free_queues(adapter);
993 igb_free_q_vectors(adapter);
994 igb_reset_interrupt_capability(adapter);
995 }
996
997 /**
998 * igb_set_interrupt_capability - set MSI or MSI-X if supported
999 *
1000 * Attempt to configure interrupts using the best available
1001 * capabilities of the hardware and kernel.
1002 **/
1003 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1004 {
1005 int err;
1006 int numvecs, i;
1007
1008 /* Number of supported queues. */
1009 adapter->num_rx_queues = adapter->rss_queues;
1010 if (adapter->vfs_allocated_count)
1011 adapter->num_tx_queues = 1;
1012 else
1013 adapter->num_tx_queues = adapter->rss_queues;
1014
1015 /* start with one vector for every rx queue */
1016 numvecs = adapter->num_rx_queues;
1017
1018 /* if tx handler is separate add 1 for every tx queue */
1019 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1020 numvecs += adapter->num_tx_queues;
1021
1022 /* store the number of vectors reserved for queues */
1023 adapter->num_q_vectors = numvecs;
1024
1025 /* add 1 vector for link status interrupts */
1026 numvecs++;
1027 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1028 GFP_KERNEL);
1029 if (!adapter->msix_entries)
1030 goto msi_only;
1031
1032 for (i = 0; i < numvecs; i++)
1033 adapter->msix_entries[i].entry = i;
1034
1035 err = pci_enable_msix(adapter->pdev,
1036 adapter->msix_entries,
1037 numvecs);
1038 if (err == 0)
1039 goto out;
1040
1041 igb_reset_interrupt_capability(adapter);
1042
1043 /* If we can't do MSI-X, try MSI */
1044 msi_only:
1045 #ifdef CONFIG_PCI_IOV
1046 /* disable SR-IOV for non MSI-X configurations */
1047 if (adapter->vf_data) {
1048 struct e1000_hw *hw = &adapter->hw;
1049 /* disable iov and allow time for transactions to clear */
1050 pci_disable_sriov(adapter->pdev);
1051 msleep(500);
1052
1053 kfree(adapter->vf_data);
1054 adapter->vf_data = NULL;
1055 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1056 wrfl();
1057 msleep(100);
1058 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1059 }
1060 #endif
1061 adapter->vfs_allocated_count = 0;
1062 adapter->rss_queues = 1;
1063 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1064 adapter->num_rx_queues = 1;
1065 adapter->num_tx_queues = 1;
1066 adapter->num_q_vectors = 1;
1067 if (!pci_enable_msi(adapter->pdev))
1068 adapter->flags |= IGB_FLAG_HAS_MSI;
1069 out:
1070 /* Notify the stack of the (possibly) reduced queue counts. */
1071 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1072 return netif_set_real_num_rx_queues(adapter->netdev,
1073 adapter->num_rx_queues);
1074 }
1075
1076 /**
1077 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1078 * @adapter: board private structure to initialize
1079 *
1080 * We allocate one q_vector per queue interrupt. If allocation fails we
1081 * return -ENOMEM.
1082 **/
1083 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1084 {
1085 struct igb_q_vector *q_vector;
1086 struct e1000_hw *hw = &adapter->hw;
1087 int v_idx;
1088
1089 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1090 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1091 if (!q_vector)
1092 goto err_out;
1093 q_vector->adapter = adapter;
1094 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1095 q_vector->itr_val = IGB_START_ITR;
1096 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1097 adapter->q_vector[v_idx] = q_vector;
1098 }
1099 return 0;
1100
1101 err_out:
1102 igb_free_q_vectors(adapter);
1103 return -ENOMEM;
1104 }
1105
1106 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1107 int ring_idx, int v_idx)
1108 {
1109 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1110
1111 q_vector->rx_ring = adapter->rx_ring[ring_idx];
1112 q_vector->rx_ring->q_vector = q_vector;
1113 q_vector->itr_val = adapter->rx_itr_setting;
1114 if (q_vector->itr_val && q_vector->itr_val <= 3)
1115 q_vector->itr_val = IGB_START_ITR;
1116 }
1117
1118 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1119 int ring_idx, int v_idx)
1120 {
1121 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1122
1123 q_vector->tx_ring = adapter->tx_ring[ring_idx];
1124 q_vector->tx_ring->q_vector = q_vector;
1125 q_vector->itr_val = adapter->tx_itr_setting;
1126 if (q_vector->itr_val && q_vector->itr_val <= 3)
1127 q_vector->itr_val = IGB_START_ITR;
1128 }
1129
1130 /**
1131 * igb_map_ring_to_vector - maps allocated queues to vectors
1132 *
1133 * This function maps the recently allocated queues to vectors.
1134 **/
1135 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1136 {
1137 int i;
1138 int v_idx = 0;
1139
1140 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1141 (adapter->num_q_vectors < adapter->num_tx_queues))
1142 return -ENOMEM;
1143
1144 if (adapter->num_q_vectors >=
1145 (adapter->num_rx_queues + adapter->num_tx_queues)) {
1146 for (i = 0; i < adapter->num_rx_queues; i++)
1147 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1148 for (i = 0; i < adapter->num_tx_queues; i++)
1149 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1150 } else {
1151 for (i = 0; i < adapter->num_rx_queues; i++) {
1152 if (i < adapter->num_tx_queues)
1153 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1154 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1155 }
1156 for (; i < adapter->num_tx_queues; i++)
1157 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1158 }
1159 return 0;
1160 }
1161
1162 /**
1163 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1164 *
1165 * This function initializes the interrupts and allocates all of the queues.
1166 **/
1167 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1168 {
1169 struct pci_dev *pdev = adapter->pdev;
1170 int err;
1171
1172 err = igb_set_interrupt_capability(adapter);
1173 if (err)
1174 return err;
1175
1176 err = igb_alloc_q_vectors(adapter);
1177 if (err) {
1178 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1179 goto err_alloc_q_vectors;
1180 }
1181
1182 err = igb_alloc_queues(adapter);
1183 if (err) {
1184 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1185 goto err_alloc_queues;
1186 }
1187
1188 err = igb_map_ring_to_vector(adapter);
1189 if (err) {
1190 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1191 goto err_map_queues;
1192 }
1193
1194
1195 return 0;
1196 err_map_queues:
1197 igb_free_queues(adapter);
1198 err_alloc_queues:
1199 igb_free_q_vectors(adapter);
1200 err_alloc_q_vectors:
1201 igb_reset_interrupt_capability(adapter);
1202 return err;
1203 }
1204
1205 /**
1206 * igb_request_irq - initialize interrupts
1207 *
1208 * Attempts to configure interrupts using the best available
1209 * capabilities of the hardware and kernel.
1210 **/
1211 static int igb_request_irq(struct igb_adapter *adapter)
1212 {
1213 struct net_device *netdev = adapter->netdev;
1214 struct pci_dev *pdev = adapter->pdev;
1215 int err = 0;
1216
1217 if (adapter->msix_entries) {
1218 err = igb_request_msix(adapter);
1219 if (!err)
1220 goto request_done;
1221 /* fall back to MSI */
1222 igb_clear_interrupt_scheme(adapter);
1223 if (!pci_enable_msi(adapter->pdev))
1224 adapter->flags |= IGB_FLAG_HAS_MSI;
1225 igb_free_all_tx_resources(adapter);
1226 igb_free_all_rx_resources(adapter);
1227 adapter->num_tx_queues = 1;
1228 adapter->num_rx_queues = 1;
1229 adapter->num_q_vectors = 1;
1230 err = igb_alloc_q_vectors(adapter);
1231 if (err) {
1232 dev_err(&pdev->dev,
1233 "Unable to allocate memory for vectors\n");
1234 goto request_done;
1235 }
1236 err = igb_alloc_queues(adapter);
1237 if (err) {
1238 dev_err(&pdev->dev,
1239 "Unable to allocate memory for queues\n");
1240 igb_free_q_vectors(adapter);
1241 goto request_done;
1242 }
1243 igb_setup_all_tx_resources(adapter);
1244 igb_setup_all_rx_resources(adapter);
1245 } else {
1246 igb_assign_vector(adapter->q_vector[0], 0);
1247 }
1248
1249 if (adapter->flags & IGB_FLAG_HAS_MSI) {
1250 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1251 netdev->name, adapter);
1252 if (!err)
1253 goto request_done;
1254
1255 /* fall back to legacy interrupts */
1256 igb_reset_interrupt_capability(adapter);
1257 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1258 }
1259
1260 err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1261 netdev->name, adapter);
1262
1263 if (err)
1264 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1265 err);
1266
1267 request_done:
1268 return err;
1269 }
1270
1271 static void igb_free_irq(struct igb_adapter *adapter)
1272 {
1273 if (adapter->msix_entries) {
1274 int vector = 0, i;
1275
1276 free_irq(adapter->msix_entries[vector++].vector, adapter);
1277
1278 for (i = 0; i < adapter->num_q_vectors; i++) {
1279 struct igb_q_vector *q_vector = adapter->q_vector[i];
1280 free_irq(adapter->msix_entries[vector++].vector,
1281 q_vector);
1282 }
1283 } else {
1284 free_irq(adapter->pdev->irq, adapter);
1285 }
1286 }
1287
1288 /**
1289 * igb_irq_disable - Mask off interrupt generation on the NIC
1290 * @adapter: board private structure
1291 **/
1292 static void igb_irq_disable(struct igb_adapter *adapter)
1293 {
1294 struct e1000_hw *hw = &adapter->hw;
1295
1296 /*
1297 * we need to be careful when disabling interrupts. The VFs are also
1298 * mapped into these registers and so clearing the bits can cause
1299 * issues on the VF drivers so we only need to clear what we set
1300 */
1301 if (adapter->msix_entries) {
1302 u32 regval = rd32(E1000_EIAM);
1303 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1304 wr32(E1000_EIMC, adapter->eims_enable_mask);
1305 regval = rd32(E1000_EIAC);
1306 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1307 }
1308
1309 wr32(E1000_IAM, 0);
1310 wr32(E1000_IMC, ~0);
1311 wrfl();
1312 if (adapter->msix_entries) {
1313 int i;
1314 for (i = 0; i < adapter->num_q_vectors; i++)
1315 synchronize_irq(adapter->msix_entries[i].vector);
1316 } else {
1317 synchronize_irq(adapter->pdev->irq);
1318 }
1319 }
1320
1321 /**
1322 * igb_irq_enable - Enable default interrupt generation settings
1323 * @adapter: board private structure
1324 **/
1325 static void igb_irq_enable(struct igb_adapter *adapter)
1326 {
1327 struct e1000_hw *hw = &adapter->hw;
1328
1329 if (adapter->msix_entries) {
1330 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1331 u32 regval = rd32(E1000_EIAC);
1332 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1333 regval = rd32(E1000_EIAM);
1334 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1335 wr32(E1000_EIMS, adapter->eims_enable_mask);
1336 if (adapter->vfs_allocated_count) {
1337 wr32(E1000_MBVFIMR, 0xFF);
1338 ims |= E1000_IMS_VMMB;
1339 }
1340 if (adapter->hw.mac.type == e1000_82580)
1341 ims |= E1000_IMS_DRSTA;
1342
1343 wr32(E1000_IMS, ims);
1344 } else {
1345 wr32(E1000_IMS, IMS_ENABLE_MASK |
1346 E1000_IMS_DRSTA);
1347 wr32(E1000_IAM, IMS_ENABLE_MASK |
1348 E1000_IMS_DRSTA);
1349 }
1350 }
1351
1352 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1353 {
1354 struct e1000_hw *hw = &adapter->hw;
1355 u16 vid = adapter->hw.mng_cookie.vlan_id;
1356 u16 old_vid = adapter->mng_vlan_id;
1357
1358 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1359 /* add VID to filter table */
1360 igb_vfta_set(hw, vid, true);
1361 adapter->mng_vlan_id = vid;
1362 } else {
1363 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1364 }
1365
1366 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1367 (vid != old_vid) &&
1368 !test_bit(old_vid, adapter->active_vlans)) {
1369 /* remove VID from filter table */
1370 igb_vfta_set(hw, old_vid, false);
1371 }
1372 }
1373
1374 /**
1375 * igb_release_hw_control - release control of the h/w to f/w
1376 * @adapter: address of board private structure
1377 *
1378 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1379 * For ASF and Pass Through versions of f/w this means that the
1380 * driver is no longer loaded.
1381 *
1382 **/
1383 static void igb_release_hw_control(struct igb_adapter *adapter)
1384 {
1385 struct e1000_hw *hw = &adapter->hw;
1386 u32 ctrl_ext;
1387
1388 /* Let firmware take over control of h/w */
1389 ctrl_ext = rd32(E1000_CTRL_EXT);
1390 wr32(E1000_CTRL_EXT,
1391 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1392 }
1393
1394 /**
1395 * igb_get_hw_control - get control of the h/w from f/w
1396 * @adapter: address of board private structure
1397 *
1398 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1399 * For ASF and Pass Through versions of f/w this means that
1400 * the driver is loaded.
1401 *
1402 **/
1403 static void igb_get_hw_control(struct igb_adapter *adapter)
1404 {
1405 struct e1000_hw *hw = &adapter->hw;
1406 u32 ctrl_ext;
1407
1408 /* Let firmware know the driver has taken over */
1409 ctrl_ext = rd32(E1000_CTRL_EXT);
1410 wr32(E1000_CTRL_EXT,
1411 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1412 }
1413
1414 /**
1415 * igb_configure - configure the hardware for RX and TX
1416 * @adapter: private board structure
1417 **/
1418 static void igb_configure(struct igb_adapter *adapter)
1419 {
1420 struct net_device *netdev = adapter->netdev;
1421 int i;
1422
1423 igb_get_hw_control(adapter);
1424 igb_set_rx_mode(netdev);
1425
1426 igb_restore_vlan(adapter);
1427
1428 igb_setup_tctl(adapter);
1429 igb_setup_mrqc(adapter);
1430 igb_setup_rctl(adapter);
1431
1432 igb_configure_tx(adapter);
1433 igb_configure_rx(adapter);
1434
1435 igb_rx_fifo_flush_82575(&adapter->hw);
1436
1437 /* call igb_desc_unused which always leaves
1438 * at least 1 descriptor unused to make sure
1439 * next_to_use != next_to_clean */
1440 for (i = 0; i < adapter->num_rx_queues; i++) {
1441 struct igb_ring *ring = adapter->rx_ring[i];
1442 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1443 }
1444 }
1445
1446 /**
1447 * igb_power_up_link - Power up the phy/serdes link
1448 * @adapter: address of board private structure
1449 **/
1450 void igb_power_up_link(struct igb_adapter *adapter)
1451 {
1452 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1453 igb_power_up_phy_copper(&adapter->hw);
1454 else
1455 igb_power_up_serdes_link_82575(&adapter->hw);
1456 }
1457
1458 /**
1459 * igb_power_down_link - Power down the phy/serdes link
1460 * @adapter: address of board private structure
1461 */
1462 static void igb_power_down_link(struct igb_adapter *adapter)
1463 {
1464 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1465 igb_power_down_phy_copper_82575(&adapter->hw);
1466 else
1467 igb_shutdown_serdes_link_82575(&adapter->hw);
1468 }
1469
1470 /**
1471 * igb_up - Open the interface and prepare it to handle traffic
1472 * @adapter: board private structure
1473 **/
1474 int igb_up(struct igb_adapter *adapter)
1475 {
1476 struct e1000_hw *hw = &adapter->hw;
1477 int i;
1478
1479 /* hardware has been reset, we need to reload some things */
1480 igb_configure(adapter);
1481
1482 clear_bit(__IGB_DOWN, &adapter->state);
1483
1484 for (i = 0; i < adapter->num_q_vectors; i++) {
1485 struct igb_q_vector *q_vector = adapter->q_vector[i];
1486 napi_enable(&q_vector->napi);
1487 }
1488 if (adapter->msix_entries)
1489 igb_configure_msix(adapter);
1490 else
1491 igb_assign_vector(adapter->q_vector[0], 0);
1492
1493 /* Clear any pending interrupts. */
1494 rd32(E1000_ICR);
1495 igb_irq_enable(adapter);
1496
1497 /* notify VFs that reset has been completed */
1498 if (adapter->vfs_allocated_count) {
1499 u32 reg_data = rd32(E1000_CTRL_EXT);
1500 reg_data |= E1000_CTRL_EXT_PFRSTD;
1501 wr32(E1000_CTRL_EXT, reg_data);
1502 }
1503
1504 netif_tx_start_all_queues(adapter->netdev);
1505
1506 /* start the watchdog. */
1507 hw->mac.get_link_status = 1;
1508 schedule_work(&adapter->watchdog_task);
1509
1510 return 0;
1511 }
1512
1513 void igb_down(struct igb_adapter *adapter)
1514 {
1515 struct net_device *netdev = adapter->netdev;
1516 struct e1000_hw *hw = &adapter->hw;
1517 u32 tctl, rctl;
1518 int i;
1519
1520 /* signal that we're down so the interrupt handler does not
1521 * reschedule our watchdog timer */
1522 set_bit(__IGB_DOWN, &adapter->state);
1523
1524 /* disable receives in the hardware */
1525 rctl = rd32(E1000_RCTL);
1526 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1527 /* flush and sleep below */
1528
1529 netif_tx_stop_all_queues(netdev);
1530
1531 /* disable transmits in the hardware */
1532 tctl = rd32(E1000_TCTL);
1533 tctl &= ~E1000_TCTL_EN;
1534 wr32(E1000_TCTL, tctl);
1535 /* flush both disables and wait for them to finish */
1536 wrfl();
1537 msleep(10);
1538
1539 for (i = 0; i < adapter->num_q_vectors; i++) {
1540 struct igb_q_vector *q_vector = adapter->q_vector[i];
1541 napi_disable(&q_vector->napi);
1542 }
1543
1544 igb_irq_disable(adapter);
1545
1546 del_timer_sync(&adapter->watchdog_timer);
1547 del_timer_sync(&adapter->phy_info_timer);
1548
1549 netif_carrier_off(netdev);
1550
1551 /* record the stats before reset*/
1552 spin_lock(&adapter->stats64_lock);
1553 igb_update_stats(adapter, &adapter->stats64);
1554 spin_unlock(&adapter->stats64_lock);
1555
1556 adapter->link_speed = 0;
1557 adapter->link_duplex = 0;
1558
1559 if (!pci_channel_offline(adapter->pdev))
1560 igb_reset(adapter);
1561 igb_clean_all_tx_rings(adapter);
1562 igb_clean_all_rx_rings(adapter);
1563 #ifdef CONFIG_IGB_DCA
1564
1565 /* since we reset the hardware DCA settings were cleared */
1566 igb_setup_dca(adapter);
1567 #endif
1568 }
1569
1570 void igb_reinit_locked(struct igb_adapter *adapter)
1571 {
1572 WARN_ON(in_interrupt());
1573 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1574 msleep(1);
1575 igb_down(adapter);
1576 igb_up(adapter);
1577 clear_bit(__IGB_RESETTING, &adapter->state);
1578 }
1579
1580 void igb_reset(struct igb_adapter *adapter)
1581 {
1582 struct pci_dev *pdev = adapter->pdev;
1583 struct e1000_hw *hw = &adapter->hw;
1584 struct e1000_mac_info *mac = &hw->mac;
1585 struct e1000_fc_info *fc = &hw->fc;
1586 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1587 u16 hwm;
1588
1589 /* Repartition Pba for greater than 9k mtu
1590 * To take effect CTRL.RST is required.
1591 */
1592 switch (mac->type) {
1593 case e1000_i350:
1594 case e1000_82580:
1595 pba = rd32(E1000_RXPBS);
1596 pba = igb_rxpbs_adjust_82580(pba);
1597 break;
1598 case e1000_82576:
1599 pba = rd32(E1000_RXPBS);
1600 pba &= E1000_RXPBS_SIZE_MASK_82576;
1601 break;
1602 case e1000_82575:
1603 default:
1604 pba = E1000_PBA_34K;
1605 break;
1606 }
1607
1608 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1609 (mac->type < e1000_82576)) {
1610 /* adjust PBA for jumbo frames */
1611 wr32(E1000_PBA, pba);
1612
1613 /* To maintain wire speed transmits, the Tx FIFO should be
1614 * large enough to accommodate two full transmit packets,
1615 * rounded up to the next 1KB and expressed in KB. Likewise,
1616 * the Rx FIFO should be large enough to accommodate at least
1617 * one full receive packet and is similarly rounded up and
1618 * expressed in KB. */
1619 pba = rd32(E1000_PBA);
1620 /* upper 16 bits has Tx packet buffer allocation size in KB */
1621 tx_space = pba >> 16;
1622 /* lower 16 bits has Rx packet buffer allocation size in KB */
1623 pba &= 0xffff;
1624 /* the tx fifo also stores 16 bytes of information about the tx
1625 * but don't include ethernet FCS because hardware appends it */
1626 min_tx_space = (adapter->max_frame_size +
1627 sizeof(union e1000_adv_tx_desc) -
1628 ETH_FCS_LEN) * 2;
1629 min_tx_space = ALIGN(min_tx_space, 1024);
1630 min_tx_space >>= 10;
1631 /* software strips receive CRC, so leave room for it */
1632 min_rx_space = adapter->max_frame_size;
1633 min_rx_space = ALIGN(min_rx_space, 1024);
1634 min_rx_space >>= 10;
1635
1636 /* If current Tx allocation is less than the min Tx FIFO size,
1637 * and the min Tx FIFO size is less than the current Rx FIFO
1638 * allocation, take space away from current Rx allocation */
1639 if (tx_space < min_tx_space &&
1640 ((min_tx_space - tx_space) < pba)) {
1641 pba = pba - (min_tx_space - tx_space);
1642
1643 /* if short on rx space, rx wins and must trump tx
1644 * adjustment */
1645 if (pba < min_rx_space)
1646 pba = min_rx_space;
1647 }
1648 wr32(E1000_PBA, pba);
1649 }
1650
1651 /* flow control settings */
1652 /* The high water mark must be low enough to fit one full frame
1653 * (or the size used for early receive) above it in the Rx FIFO.
1654 * Set it to the lower of:
1655 * - 90% of the Rx FIFO size, or
1656 * - the full Rx FIFO size minus one full frame */
1657 hwm = min(((pba << 10) * 9 / 10),
1658 ((pba << 10) - 2 * adapter->max_frame_size));
1659
1660 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1661 fc->low_water = fc->high_water - 16;
1662 fc->pause_time = 0xFFFF;
1663 fc->send_xon = 1;
1664 fc->current_mode = fc->requested_mode;
1665
1666 /* disable receive for all VFs and wait one second */
1667 if (adapter->vfs_allocated_count) {
1668 int i;
1669 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1670 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1671
1672 /* ping all the active vfs to let them know we are going down */
1673 igb_ping_all_vfs(adapter);
1674
1675 /* disable transmits and receives */
1676 wr32(E1000_VFRE, 0);
1677 wr32(E1000_VFTE, 0);
1678 }
1679
1680 /* Allow time for pending master requests to run */
1681 hw->mac.ops.reset_hw(hw);
1682 wr32(E1000_WUC, 0);
1683
1684 if (hw->mac.ops.init_hw(hw))
1685 dev_err(&pdev->dev, "Hardware Error\n");
1686 if (hw->mac.type > e1000_82580) {
1687 if (adapter->flags & IGB_FLAG_DMAC) {
1688 u32 reg;
1689
1690 /*
1691 * DMA Coalescing high water mark needs to be higher
1692 * than * the * Rx threshold. The Rx threshold is
1693 * currently * pba - 6, so we * should use a high water
1694 * mark of pba * - 4. */
1695 hwm = (pba - 4) << 10;
1696
1697 reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1698 & E1000_DMACR_DMACTHR_MASK);
1699
1700 /* transition to L0x or L1 if available..*/
1701 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1702
1703 /* watchdog timer= +-1000 usec in 32usec intervals */
1704 reg |= (1000 >> 5);
1705 wr32(E1000_DMACR, reg);
1706
1707 /* no lower threshold to disable coalescing(smart fifb)
1708 * -UTRESH=0*/
1709 wr32(E1000_DMCRTRH, 0);
1710
1711 /* set hwm to PBA - 2 * max frame size */
1712 wr32(E1000_FCRTC, hwm);
1713
1714 /*
1715 * This sets the time to wait before requesting tran-
1716 * sition to * low power state to number of usecs needed
1717 * to receive 1 512 * byte frame at gigabit line rate
1718 */
1719 reg = rd32(E1000_DMCTLX);
1720 reg |= IGB_DMCTLX_DCFLUSH_DIS;
1721
1722 /* Delay 255 usec before entering Lx state. */
1723 reg |= 0xFF;
1724 wr32(E1000_DMCTLX, reg);
1725
1726 /* free space in Tx packet buffer to wake from DMAC */
1727 wr32(E1000_DMCTXTH,
1728 (IGB_MIN_TXPBSIZE -
1729 (IGB_TX_BUF_4096 + adapter->max_frame_size))
1730 >> 6);
1731
1732 /* make low power state decision controlled by DMAC */
1733 reg = rd32(E1000_PCIEMISC);
1734 reg |= E1000_PCIEMISC_LX_DECISION;
1735 wr32(E1000_PCIEMISC, reg);
1736 } /* end if IGB_FLAG_DMAC set */
1737 }
1738 if (hw->mac.type == e1000_82580) {
1739 u32 reg = rd32(E1000_PCIEMISC);
1740 wr32(E1000_PCIEMISC,
1741 reg & ~E1000_PCIEMISC_LX_DECISION);
1742 }
1743 if (!netif_running(adapter->netdev))
1744 igb_power_down_link(adapter);
1745
1746 igb_update_mng_vlan(adapter);
1747
1748 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1749 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1750
1751 igb_get_phy_info(hw);
1752 }
1753
1754 static u32 igb_fix_features(struct net_device *netdev, u32 features)
1755 {
1756 /*
1757 * Since there is no support for separate rx/tx vlan accel
1758 * enable/disable make sure tx flag is always in same state as rx.
1759 */
1760 if (features & NETIF_F_HW_VLAN_RX)
1761 features |= NETIF_F_HW_VLAN_TX;
1762 else
1763 features &= ~NETIF_F_HW_VLAN_TX;
1764
1765 return features;
1766 }
1767
1768 static int igb_set_features(struct net_device *netdev, u32 features)
1769 {
1770 struct igb_adapter *adapter = netdev_priv(netdev);
1771 int i;
1772 u32 changed = netdev->features ^ features;
1773
1774 for (i = 0; i < adapter->num_rx_queues; i++) {
1775 if (features & NETIF_F_RXCSUM)
1776 adapter->rx_ring[i]->flags |= IGB_RING_FLAG_RX_CSUM;
1777 else
1778 adapter->rx_ring[i]->flags &= ~IGB_RING_FLAG_RX_CSUM;
1779 }
1780
1781 if (changed & NETIF_F_HW_VLAN_RX)
1782 igb_vlan_mode(netdev, features);
1783
1784 return 0;
1785 }
1786
1787 static const struct net_device_ops igb_netdev_ops = {
1788 .ndo_open = igb_open,
1789 .ndo_stop = igb_close,
1790 .ndo_start_xmit = igb_xmit_frame_adv,
1791 .ndo_get_stats64 = igb_get_stats64,
1792 .ndo_set_rx_mode = igb_set_rx_mode,
1793 .ndo_set_multicast_list = igb_set_rx_mode,
1794 .ndo_set_mac_address = igb_set_mac,
1795 .ndo_change_mtu = igb_change_mtu,
1796 .ndo_do_ioctl = igb_ioctl,
1797 .ndo_tx_timeout = igb_tx_timeout,
1798 .ndo_validate_addr = eth_validate_addr,
1799 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1800 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1801 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1802 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1803 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1804 .ndo_get_vf_config = igb_ndo_get_vf_config,
1805 #ifdef CONFIG_NET_POLL_CONTROLLER
1806 .ndo_poll_controller = igb_netpoll,
1807 #endif
1808 .ndo_fix_features = igb_fix_features,
1809 .ndo_set_features = igb_set_features,
1810 };
1811
1812 /**
1813 * igb_probe - Device Initialization Routine
1814 * @pdev: PCI device information struct
1815 * @ent: entry in igb_pci_tbl
1816 *
1817 * Returns 0 on success, negative on failure
1818 *
1819 * igb_probe initializes an adapter identified by a pci_dev structure.
1820 * The OS initialization, configuring of the adapter private structure,
1821 * and a hardware reset occur.
1822 **/
1823 static int __devinit igb_probe(struct pci_dev *pdev,
1824 const struct pci_device_id *ent)
1825 {
1826 struct net_device *netdev;
1827 struct igb_adapter *adapter;
1828 struct e1000_hw *hw;
1829 u16 eeprom_data = 0;
1830 s32 ret_val;
1831 static int global_quad_port_a; /* global quad port a indication */
1832 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1833 unsigned long mmio_start, mmio_len;
1834 int err, pci_using_dac;
1835 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1836 u8 part_str[E1000_PBANUM_LENGTH];
1837
1838 /* Catch broken hardware that put the wrong VF device ID in
1839 * the PCIe SR-IOV capability.
1840 */
1841 if (pdev->is_virtfn) {
1842 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1843 pci_name(pdev), pdev->vendor, pdev->device);
1844 return -EINVAL;
1845 }
1846
1847 err = pci_enable_device_mem(pdev);
1848 if (err)
1849 return err;
1850
1851 pci_using_dac = 0;
1852 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1853 if (!err) {
1854 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1855 if (!err)
1856 pci_using_dac = 1;
1857 } else {
1858 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1859 if (err) {
1860 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1861 if (err) {
1862 dev_err(&pdev->dev, "No usable DMA "
1863 "configuration, aborting\n");
1864 goto err_dma;
1865 }
1866 }
1867 }
1868
1869 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1870 IORESOURCE_MEM),
1871 igb_driver_name);
1872 if (err)
1873 goto err_pci_reg;
1874
1875 pci_enable_pcie_error_reporting(pdev);
1876
1877 pci_set_master(pdev);
1878 pci_save_state(pdev);
1879
1880 err = -ENOMEM;
1881 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1882 IGB_ABS_MAX_TX_QUEUES);
1883 if (!netdev)
1884 goto err_alloc_etherdev;
1885
1886 SET_NETDEV_DEV(netdev, &pdev->dev);
1887
1888 pci_set_drvdata(pdev, netdev);
1889 adapter = netdev_priv(netdev);
1890 adapter->netdev = netdev;
1891 adapter->pdev = pdev;
1892 hw = &adapter->hw;
1893 hw->back = adapter;
1894 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1895
1896 mmio_start = pci_resource_start(pdev, 0);
1897 mmio_len = pci_resource_len(pdev, 0);
1898
1899 err = -EIO;
1900 hw->hw_addr = ioremap(mmio_start, mmio_len);
1901 if (!hw->hw_addr)
1902 goto err_ioremap;
1903
1904 netdev->netdev_ops = &igb_netdev_ops;
1905 igb_set_ethtool_ops(netdev);
1906 netdev->watchdog_timeo = 5 * HZ;
1907
1908 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1909
1910 netdev->mem_start = mmio_start;
1911 netdev->mem_end = mmio_start + mmio_len;
1912
1913 /* PCI config space info */
1914 hw->vendor_id = pdev->vendor;
1915 hw->device_id = pdev->device;
1916 hw->revision_id = pdev->revision;
1917 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1918 hw->subsystem_device_id = pdev->subsystem_device;
1919
1920 /* Copy the default MAC, PHY and NVM function pointers */
1921 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1922 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1923 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1924 /* Initialize skew-specific constants */
1925 err = ei->get_invariants(hw);
1926 if (err)
1927 goto err_sw_init;
1928
1929 /* setup the private structure */
1930 err = igb_sw_init(adapter);
1931 if (err)
1932 goto err_sw_init;
1933
1934 igb_get_bus_info_pcie(hw);
1935
1936 hw->phy.autoneg_wait_to_complete = false;
1937
1938 /* Copper options */
1939 if (hw->phy.media_type == e1000_media_type_copper) {
1940 hw->phy.mdix = AUTO_ALL_MODES;
1941 hw->phy.disable_polarity_correction = false;
1942 hw->phy.ms_type = e1000_ms_hw_default;
1943 }
1944
1945 if (igb_check_reset_block(hw))
1946 dev_info(&pdev->dev,
1947 "PHY reset is blocked due to SOL/IDER session.\n");
1948
1949 netdev->hw_features = NETIF_F_SG |
1950 NETIF_F_IP_CSUM |
1951 NETIF_F_IPV6_CSUM |
1952 NETIF_F_TSO |
1953 NETIF_F_TSO6 |
1954 NETIF_F_RXCSUM |
1955 NETIF_F_HW_VLAN_RX;
1956
1957 netdev->features = netdev->hw_features |
1958 NETIF_F_HW_VLAN_TX |
1959 NETIF_F_HW_VLAN_FILTER;
1960
1961 netdev->vlan_features |= NETIF_F_TSO;
1962 netdev->vlan_features |= NETIF_F_TSO6;
1963 netdev->vlan_features |= NETIF_F_IP_CSUM;
1964 netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1965 netdev->vlan_features |= NETIF_F_SG;
1966
1967 if (pci_using_dac) {
1968 netdev->features |= NETIF_F_HIGHDMA;
1969 netdev->vlan_features |= NETIF_F_HIGHDMA;
1970 }
1971
1972 if (hw->mac.type >= e1000_82576) {
1973 netdev->hw_features |= NETIF_F_SCTP_CSUM;
1974 netdev->features |= NETIF_F_SCTP_CSUM;
1975 }
1976
1977 netdev->priv_flags |= IFF_UNICAST_FLT;
1978
1979 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1980
1981 /* before reading the NVM, reset the controller to put the device in a
1982 * known good starting state */
1983 hw->mac.ops.reset_hw(hw);
1984
1985 /* make sure the NVM is good */
1986 if (hw->nvm.ops.validate(hw) < 0) {
1987 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1988 err = -EIO;
1989 goto err_eeprom;
1990 }
1991
1992 /* copy the MAC address out of the NVM */
1993 if (hw->mac.ops.read_mac_addr(hw))
1994 dev_err(&pdev->dev, "NVM Read Error\n");
1995
1996 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1997 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1998
1999 if (!is_valid_ether_addr(netdev->perm_addr)) {
2000 dev_err(&pdev->dev, "Invalid MAC Address\n");
2001 err = -EIO;
2002 goto err_eeprom;
2003 }
2004
2005 setup_timer(&adapter->watchdog_timer, igb_watchdog,
2006 (unsigned long) adapter);
2007 setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2008 (unsigned long) adapter);
2009
2010 INIT_WORK(&adapter->reset_task, igb_reset_task);
2011 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2012
2013 /* Initialize link properties that are user-changeable */
2014 adapter->fc_autoneg = true;
2015 hw->mac.autoneg = true;
2016 hw->phy.autoneg_advertised = 0x2f;
2017
2018 hw->fc.requested_mode = e1000_fc_default;
2019 hw->fc.current_mode = e1000_fc_default;
2020
2021 igb_validate_mdi_setting(hw);
2022
2023 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2024 * enable the ACPI Magic Packet filter
2025 */
2026
2027 if (hw->bus.func == 0)
2028 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2029 else if (hw->mac.type >= e1000_82580)
2030 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2031 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2032 &eeprom_data);
2033 else if (hw->bus.func == 1)
2034 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2035
2036 if (eeprom_data & eeprom_apme_mask)
2037 adapter->eeprom_wol |= E1000_WUFC_MAG;
2038
2039 /* now that we have the eeprom settings, apply the special cases where
2040 * the eeprom may be wrong or the board simply won't support wake on
2041 * lan on a particular port */
2042 switch (pdev->device) {
2043 case E1000_DEV_ID_82575GB_QUAD_COPPER:
2044 adapter->eeprom_wol = 0;
2045 break;
2046 case E1000_DEV_ID_82575EB_FIBER_SERDES:
2047 case E1000_DEV_ID_82576_FIBER:
2048 case E1000_DEV_ID_82576_SERDES:
2049 /* Wake events only supported on port A for dual fiber
2050 * regardless of eeprom setting */
2051 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2052 adapter->eeprom_wol = 0;
2053 break;
2054 case E1000_DEV_ID_82576_QUAD_COPPER:
2055 case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2056 /* if quad port adapter, disable WoL on all but port A */
2057 if (global_quad_port_a != 0)
2058 adapter->eeprom_wol = 0;
2059 else
2060 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2061 /* Reset for multiple quad port adapters */
2062 if (++global_quad_port_a == 4)
2063 global_quad_port_a = 0;
2064 break;
2065 }
2066
2067 /* initialize the wol settings based on the eeprom settings */
2068 adapter->wol = adapter->eeprom_wol;
2069 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2070
2071 /* reset the hardware with the new settings */
2072 igb_reset(adapter);
2073
2074 /* let the f/w know that the h/w is now under the control of the
2075 * driver. */
2076 igb_get_hw_control(adapter);
2077
2078 strcpy(netdev->name, "eth%d");
2079 err = register_netdev(netdev);
2080 if (err)
2081 goto err_register;
2082
2083 igb_vlan_mode(netdev, netdev->features);
2084
2085 /* carrier off reporting is important to ethtool even BEFORE open */
2086 netif_carrier_off(netdev);
2087
2088 #ifdef CONFIG_IGB_DCA
2089 if (dca_add_requester(&pdev->dev) == 0) {
2090 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2091 dev_info(&pdev->dev, "DCA enabled\n");
2092 igb_setup_dca(adapter);
2093 }
2094
2095 #endif
2096 /* do hw tstamp init after resetting */
2097 igb_init_hw_timer(adapter);
2098
2099 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2100 /* print bus type/speed/width info */
2101 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2102 netdev->name,
2103 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2104 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2105 "unknown"),
2106 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2107 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2108 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2109 "unknown"),
2110 netdev->dev_addr);
2111
2112 ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2113 if (ret_val)
2114 strcpy(part_str, "Unknown");
2115 dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2116 dev_info(&pdev->dev,
2117 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2118 adapter->msix_entries ? "MSI-X" :
2119 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2120 adapter->num_rx_queues, adapter->num_tx_queues);
2121 switch (hw->mac.type) {
2122 case e1000_i350:
2123 igb_set_eee_i350(hw);
2124 break;
2125 default:
2126 break;
2127 }
2128 return 0;
2129
2130 err_register:
2131 igb_release_hw_control(adapter);
2132 err_eeprom:
2133 if (!igb_check_reset_block(hw))
2134 igb_reset_phy(hw);
2135
2136 if (hw->flash_address)
2137 iounmap(hw->flash_address);
2138 err_sw_init:
2139 igb_clear_interrupt_scheme(adapter);
2140 iounmap(hw->hw_addr);
2141 err_ioremap:
2142 free_netdev(netdev);
2143 err_alloc_etherdev:
2144 pci_release_selected_regions(pdev,
2145 pci_select_bars(pdev, IORESOURCE_MEM));
2146 err_pci_reg:
2147 err_dma:
2148 pci_disable_device(pdev);
2149 return err;
2150 }
2151
2152 /**
2153 * igb_remove - Device Removal Routine
2154 * @pdev: PCI device information struct
2155 *
2156 * igb_remove is called by the PCI subsystem to alert the driver
2157 * that it should release a PCI device. The could be caused by a
2158 * Hot-Plug event, or because the driver is going to be removed from
2159 * memory.
2160 **/
2161 static void __devexit igb_remove(struct pci_dev *pdev)
2162 {
2163 struct net_device *netdev = pci_get_drvdata(pdev);
2164 struct igb_adapter *adapter = netdev_priv(netdev);
2165 struct e1000_hw *hw = &adapter->hw;
2166
2167 /*
2168 * The watchdog timer may be rescheduled, so explicitly
2169 * disable watchdog from being rescheduled.
2170 */
2171 set_bit(__IGB_DOWN, &adapter->state);
2172 del_timer_sync(&adapter->watchdog_timer);
2173 del_timer_sync(&adapter->phy_info_timer);
2174
2175 cancel_work_sync(&adapter->reset_task);
2176 cancel_work_sync(&adapter->watchdog_task);
2177
2178 #ifdef CONFIG_IGB_DCA
2179 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2180 dev_info(&pdev->dev, "DCA disabled\n");
2181 dca_remove_requester(&pdev->dev);
2182 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2183 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2184 }
2185 #endif
2186
2187 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2188 * would have already happened in close and is redundant. */
2189 igb_release_hw_control(adapter);
2190
2191 unregister_netdev(netdev);
2192
2193 igb_clear_interrupt_scheme(adapter);
2194
2195 #ifdef CONFIG_PCI_IOV
2196 /* reclaim resources allocated to VFs */
2197 if (adapter->vf_data) {
2198 /* disable iov and allow time for transactions to clear */
2199 pci_disable_sriov(pdev);
2200 msleep(500);
2201
2202 kfree(adapter->vf_data);
2203 adapter->vf_data = NULL;
2204 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2205 wrfl();
2206 msleep(100);
2207 dev_info(&pdev->dev, "IOV Disabled\n");
2208 }
2209 #endif
2210
2211 iounmap(hw->hw_addr);
2212 if (hw->flash_address)
2213 iounmap(hw->flash_address);
2214 pci_release_selected_regions(pdev,
2215 pci_select_bars(pdev, IORESOURCE_MEM));
2216
2217 free_netdev(netdev);
2218
2219 pci_disable_pcie_error_reporting(pdev);
2220
2221 pci_disable_device(pdev);
2222 }
2223
2224 /**
2225 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2226 * @adapter: board private structure to initialize
2227 *
2228 * This function initializes the vf specific data storage and then attempts to
2229 * allocate the VFs. The reason for ordering it this way is because it is much
2230 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2231 * the memory for the VFs.
2232 **/
2233 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2234 {
2235 #ifdef CONFIG_PCI_IOV
2236 struct pci_dev *pdev = adapter->pdev;
2237
2238 if (adapter->vfs_allocated_count) {
2239 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2240 sizeof(struct vf_data_storage),
2241 GFP_KERNEL);
2242 /* if allocation failed then we do not support SR-IOV */
2243 if (!adapter->vf_data) {
2244 adapter->vfs_allocated_count = 0;
2245 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2246 "Data Storage\n");
2247 }
2248 }
2249
2250 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2251 kfree(adapter->vf_data);
2252 adapter->vf_data = NULL;
2253 #endif /* CONFIG_PCI_IOV */
2254 adapter->vfs_allocated_count = 0;
2255 #ifdef CONFIG_PCI_IOV
2256 } else {
2257 unsigned char mac_addr[ETH_ALEN];
2258 int i;
2259 dev_info(&pdev->dev, "%d vfs allocated\n",
2260 adapter->vfs_allocated_count);
2261 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2262 random_ether_addr(mac_addr);
2263 igb_set_vf_mac(adapter, i, mac_addr);
2264 }
2265 /* DMA Coalescing is not supported in IOV mode. */
2266 if (adapter->flags & IGB_FLAG_DMAC)
2267 adapter->flags &= ~IGB_FLAG_DMAC;
2268 }
2269 #endif /* CONFIG_PCI_IOV */
2270 }
2271
2272
2273 /**
2274 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2275 * @adapter: board private structure to initialize
2276 *
2277 * igb_init_hw_timer initializes the function pointer and values for the hw
2278 * timer found in hardware.
2279 **/
2280 static void igb_init_hw_timer(struct igb_adapter *adapter)
2281 {
2282 struct e1000_hw *hw = &adapter->hw;
2283
2284 switch (hw->mac.type) {
2285 case e1000_i350:
2286 case e1000_82580:
2287 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2288 adapter->cycles.read = igb_read_clock;
2289 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2290 adapter->cycles.mult = 1;
2291 /*
2292 * The 82580 timesync updates the system timer every 8ns by 8ns
2293 * and the value cannot be shifted. Instead we need to shift
2294 * the registers to generate a 64bit timer value. As a result
2295 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2296 * 24 in order to generate a larger value for synchronization.
2297 */
2298 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2299 /* disable system timer temporarily by setting bit 31 */
2300 wr32(E1000_TSAUXC, 0x80000000);
2301 wrfl();
2302
2303 /* Set registers so that rollover occurs soon to test this. */
2304 wr32(E1000_SYSTIMR, 0x00000000);
2305 wr32(E1000_SYSTIML, 0x80000000);
2306 wr32(E1000_SYSTIMH, 0x000000FF);
2307 wrfl();
2308
2309 /* enable system timer by clearing bit 31 */
2310 wr32(E1000_TSAUXC, 0x0);
2311 wrfl();
2312
2313 timecounter_init(&adapter->clock,
2314 &adapter->cycles,
2315 ktime_to_ns(ktime_get_real()));
2316 /*
2317 * Synchronize our NIC clock against system wall clock. NIC
2318 * time stamp reading requires ~3us per sample, each sample
2319 * was pretty stable even under load => only require 10
2320 * samples for each offset comparison.
2321 */
2322 memset(&adapter->compare, 0, sizeof(adapter->compare));
2323 adapter->compare.source = &adapter->clock;
2324 adapter->compare.target = ktime_get_real;
2325 adapter->compare.num_samples = 10;
2326 timecompare_update(&adapter->compare, 0);
2327 break;
2328 case e1000_82576:
2329 /*
2330 * Initialize hardware timer: we keep it running just in case
2331 * that some program needs it later on.
2332 */
2333 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2334 adapter->cycles.read = igb_read_clock;
2335 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2336 adapter->cycles.mult = 1;
2337 /**
2338 * Scale the NIC clock cycle by a large factor so that
2339 * relatively small clock corrections can be added or
2340 * subtracted at each clock tick. The drawbacks of a large
2341 * factor are a) that the clock register overflows more quickly
2342 * (not such a big deal) and b) that the increment per tick has
2343 * to fit into 24 bits. As a result we need to use a shift of
2344 * 19 so we can fit a value of 16 into the TIMINCA register.
2345 */
2346 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2347 wr32(E1000_TIMINCA,
2348 (1 << E1000_TIMINCA_16NS_SHIFT) |
2349 (16 << IGB_82576_TSYNC_SHIFT));
2350
2351 /* Set registers so that rollover occurs soon to test this. */
2352 wr32(E1000_SYSTIML, 0x00000000);
2353 wr32(E1000_SYSTIMH, 0xFF800000);
2354 wrfl();
2355
2356 timecounter_init(&adapter->clock,
2357 &adapter->cycles,
2358 ktime_to_ns(ktime_get_real()));
2359 /*
2360 * Synchronize our NIC clock against system wall clock. NIC
2361 * time stamp reading requires ~3us per sample, each sample
2362 * was pretty stable even under load => only require 10
2363 * samples for each offset comparison.
2364 */
2365 memset(&adapter->compare, 0, sizeof(adapter->compare));
2366 adapter->compare.source = &adapter->clock;
2367 adapter->compare.target = ktime_get_real;
2368 adapter->compare.num_samples = 10;
2369 timecompare_update(&adapter->compare, 0);
2370 break;
2371 case e1000_82575:
2372 /* 82575 does not support timesync */
2373 default:
2374 break;
2375 }
2376
2377 }
2378
2379 /**
2380 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2381 * @adapter: board private structure to initialize
2382 *
2383 * igb_sw_init initializes the Adapter private data structure.
2384 * Fields are initialized based on PCI device information and
2385 * OS network device settings (MTU size).
2386 **/
2387 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2388 {
2389 struct e1000_hw *hw = &adapter->hw;
2390 struct net_device *netdev = adapter->netdev;
2391 struct pci_dev *pdev = adapter->pdev;
2392
2393 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2394
2395 adapter->tx_ring_count = IGB_DEFAULT_TXD;
2396 adapter->rx_ring_count = IGB_DEFAULT_RXD;
2397 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2398 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2399
2400 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2401 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2402
2403 spin_lock_init(&adapter->stats64_lock);
2404 #ifdef CONFIG_PCI_IOV
2405 switch (hw->mac.type) {
2406 case e1000_82576:
2407 case e1000_i350:
2408 if (max_vfs > 7) {
2409 dev_warn(&pdev->dev,
2410 "Maximum of 7 VFs per PF, using max\n");
2411 adapter->vfs_allocated_count = 7;
2412 } else
2413 adapter->vfs_allocated_count = max_vfs;
2414 break;
2415 default:
2416 break;
2417 }
2418 #endif /* CONFIG_PCI_IOV */
2419 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2420 /* i350 cannot do RSS and SR-IOV at the same time */
2421 if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2422 adapter->rss_queues = 1;
2423
2424 /*
2425 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2426 * then we should combine the queues into a queue pair in order to
2427 * conserve interrupts due to limited supply
2428 */
2429 if ((adapter->rss_queues > 4) ||
2430 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2431 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2432
2433 /* This call may decrease the number of queues */
2434 if (igb_init_interrupt_scheme(adapter)) {
2435 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2436 return -ENOMEM;
2437 }
2438
2439 igb_probe_vfs(adapter);
2440
2441 /* Explicitly disable IRQ since the NIC can be in any state. */
2442 igb_irq_disable(adapter);
2443
2444 if (hw->mac.type == e1000_i350)
2445 adapter->flags &= ~IGB_FLAG_DMAC;
2446
2447 set_bit(__IGB_DOWN, &adapter->state);
2448 return 0;
2449 }
2450
2451 /**
2452 * igb_open - Called when a network interface is made active
2453 * @netdev: network interface device structure
2454 *
2455 * Returns 0 on success, negative value on failure
2456 *
2457 * The open entry point is called when a network interface is made
2458 * active by the system (IFF_UP). At this point all resources needed
2459 * for transmit and receive operations are allocated, the interrupt
2460 * handler is registered with the OS, the watchdog timer is started,
2461 * and the stack is notified that the interface is ready.
2462 **/
2463 static int igb_open(struct net_device *netdev)
2464 {
2465 struct igb_adapter *adapter = netdev_priv(netdev);
2466 struct e1000_hw *hw = &adapter->hw;
2467 int err;
2468 int i;
2469
2470 /* disallow open during test */
2471 if (test_bit(__IGB_TESTING, &adapter->state))
2472 return -EBUSY;
2473
2474 netif_carrier_off(netdev);
2475
2476 /* allocate transmit descriptors */
2477 err = igb_setup_all_tx_resources(adapter);
2478 if (err)
2479 goto err_setup_tx;
2480
2481 /* allocate receive descriptors */
2482 err = igb_setup_all_rx_resources(adapter);
2483 if (err)
2484 goto err_setup_rx;
2485
2486 igb_power_up_link(adapter);
2487
2488 /* before we allocate an interrupt, we must be ready to handle it.
2489 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2490 * as soon as we call pci_request_irq, so we have to setup our
2491 * clean_rx handler before we do so. */
2492 igb_configure(adapter);
2493
2494 err = igb_request_irq(adapter);
2495 if (err)
2496 goto err_req_irq;
2497
2498 /* From here on the code is the same as igb_up() */
2499 clear_bit(__IGB_DOWN, &adapter->state);
2500
2501 for (i = 0; i < adapter->num_q_vectors; i++) {
2502 struct igb_q_vector *q_vector = adapter->q_vector[i];
2503 napi_enable(&q_vector->napi);
2504 }
2505
2506 /* Clear any pending interrupts. */
2507 rd32(E1000_ICR);
2508
2509 igb_irq_enable(adapter);
2510
2511 /* notify VFs that reset has been completed */
2512 if (adapter->vfs_allocated_count) {
2513 u32 reg_data = rd32(E1000_CTRL_EXT);
2514 reg_data |= E1000_CTRL_EXT_PFRSTD;
2515 wr32(E1000_CTRL_EXT, reg_data);
2516 }
2517
2518 netif_tx_start_all_queues(netdev);
2519
2520 /* start the watchdog. */
2521 hw->mac.get_link_status = 1;
2522 schedule_work(&adapter->watchdog_task);
2523
2524 return 0;
2525
2526 err_req_irq:
2527 igb_release_hw_control(adapter);
2528 igb_power_down_link(adapter);
2529 igb_free_all_rx_resources(adapter);
2530 err_setup_rx:
2531 igb_free_all_tx_resources(adapter);
2532 err_setup_tx:
2533 igb_reset(adapter);
2534
2535 return err;
2536 }
2537
2538 /**
2539 * igb_close - Disables a network interface
2540 * @netdev: network interface device structure
2541 *
2542 * Returns 0, this is not allowed to fail
2543 *
2544 * The close entry point is called when an interface is de-activated
2545 * by the OS. The hardware is still under the driver's control, but
2546 * needs to be disabled. A global MAC reset is issued to stop the
2547 * hardware, and all transmit and receive resources are freed.
2548 **/
2549 static int igb_close(struct net_device *netdev)
2550 {
2551 struct igb_adapter *adapter = netdev_priv(netdev);
2552
2553 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2554 igb_down(adapter);
2555
2556 igb_free_irq(adapter);
2557
2558 igb_free_all_tx_resources(adapter);
2559 igb_free_all_rx_resources(adapter);
2560
2561 return 0;
2562 }
2563
2564 /**
2565 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2566 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2567 *
2568 * Return 0 on success, negative on failure
2569 **/
2570 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2571 {
2572 struct device *dev = tx_ring->dev;
2573 int size;
2574
2575 size = sizeof(struct igb_buffer) * tx_ring->count;
2576 tx_ring->buffer_info = vzalloc(size);
2577 if (!tx_ring->buffer_info)
2578 goto err;
2579
2580 /* round up to nearest 4K */
2581 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2582 tx_ring->size = ALIGN(tx_ring->size, 4096);
2583
2584 tx_ring->desc = dma_alloc_coherent(dev,
2585 tx_ring->size,
2586 &tx_ring->dma,
2587 GFP_KERNEL);
2588
2589 if (!tx_ring->desc)
2590 goto err;
2591
2592 tx_ring->next_to_use = 0;
2593 tx_ring->next_to_clean = 0;
2594 return 0;
2595
2596 err:
2597 vfree(tx_ring->buffer_info);
2598 dev_err(dev,
2599 "Unable to allocate memory for the transmit descriptor ring\n");
2600 return -ENOMEM;
2601 }
2602
2603 /**
2604 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2605 * (Descriptors) for all queues
2606 * @adapter: board private structure
2607 *
2608 * Return 0 on success, negative on failure
2609 **/
2610 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2611 {
2612 struct pci_dev *pdev = adapter->pdev;
2613 int i, err = 0;
2614
2615 for (i = 0; i < adapter->num_tx_queues; i++) {
2616 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2617 if (err) {
2618 dev_err(&pdev->dev,
2619 "Allocation for Tx Queue %u failed\n", i);
2620 for (i--; i >= 0; i--)
2621 igb_free_tx_resources(adapter->tx_ring[i]);
2622 break;
2623 }
2624 }
2625
2626 for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2627 int r_idx = i % adapter->num_tx_queues;
2628 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2629 }
2630 return err;
2631 }
2632
2633 /**
2634 * igb_setup_tctl - configure the transmit control registers
2635 * @adapter: Board private structure
2636 **/
2637 void igb_setup_tctl(struct igb_adapter *adapter)
2638 {
2639 struct e1000_hw *hw = &adapter->hw;
2640 u32 tctl;
2641
2642 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2643 wr32(E1000_TXDCTL(0), 0);
2644
2645 /* Program the Transmit Control Register */
2646 tctl = rd32(E1000_TCTL);
2647 tctl &= ~E1000_TCTL_CT;
2648 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2649 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2650
2651 igb_config_collision_dist(hw);
2652
2653 /* Enable transmits */
2654 tctl |= E1000_TCTL_EN;
2655
2656 wr32(E1000_TCTL, tctl);
2657 }
2658
2659 /**
2660 * igb_configure_tx_ring - Configure transmit ring after Reset
2661 * @adapter: board private structure
2662 * @ring: tx ring to configure
2663 *
2664 * Configure a transmit ring after a reset.
2665 **/
2666 void igb_configure_tx_ring(struct igb_adapter *adapter,
2667 struct igb_ring *ring)
2668 {
2669 struct e1000_hw *hw = &adapter->hw;
2670 u32 txdctl;
2671 u64 tdba = ring->dma;
2672 int reg_idx = ring->reg_idx;
2673
2674 /* disable the queue */
2675 txdctl = rd32(E1000_TXDCTL(reg_idx));
2676 wr32(E1000_TXDCTL(reg_idx),
2677 txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2678 wrfl();
2679 mdelay(10);
2680
2681 wr32(E1000_TDLEN(reg_idx),
2682 ring->count * sizeof(union e1000_adv_tx_desc));
2683 wr32(E1000_TDBAL(reg_idx),
2684 tdba & 0x00000000ffffffffULL);
2685 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2686
2687 ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2688 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2689 writel(0, ring->head);
2690 writel(0, ring->tail);
2691
2692 txdctl |= IGB_TX_PTHRESH;
2693 txdctl |= IGB_TX_HTHRESH << 8;
2694 txdctl |= IGB_TX_WTHRESH << 16;
2695
2696 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2697 wr32(E1000_TXDCTL(reg_idx), txdctl);
2698 }
2699
2700 /**
2701 * igb_configure_tx - Configure transmit Unit after Reset
2702 * @adapter: board private structure
2703 *
2704 * Configure the Tx unit of the MAC after a reset.
2705 **/
2706 static void igb_configure_tx(struct igb_adapter *adapter)
2707 {
2708 int i;
2709
2710 for (i = 0; i < adapter->num_tx_queues; i++)
2711 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2712 }
2713
2714 /**
2715 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2716 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2717 *
2718 * Returns 0 on success, negative on failure
2719 **/
2720 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2721 {
2722 struct device *dev = rx_ring->dev;
2723 int size, desc_len;
2724
2725 size = sizeof(struct igb_buffer) * rx_ring->count;
2726 rx_ring->buffer_info = vzalloc(size);
2727 if (!rx_ring->buffer_info)
2728 goto err;
2729
2730 desc_len = sizeof(union e1000_adv_rx_desc);
2731
2732 /* Round up to nearest 4K */
2733 rx_ring->size = rx_ring->count * desc_len;
2734 rx_ring->size = ALIGN(rx_ring->size, 4096);
2735
2736 rx_ring->desc = dma_alloc_coherent(dev,
2737 rx_ring->size,
2738 &rx_ring->dma,
2739 GFP_KERNEL);
2740
2741 if (!rx_ring->desc)
2742 goto err;
2743
2744 rx_ring->next_to_clean = 0;
2745 rx_ring->next_to_use = 0;
2746
2747 return 0;
2748
2749 err:
2750 vfree(rx_ring->buffer_info);
2751 rx_ring->buffer_info = NULL;
2752 dev_err(dev, "Unable to allocate memory for the receive descriptor"
2753 " ring\n");
2754 return -ENOMEM;
2755 }
2756
2757 /**
2758 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2759 * (Descriptors) for all queues
2760 * @adapter: board private structure
2761 *
2762 * Return 0 on success, negative on failure
2763 **/
2764 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2765 {
2766 struct pci_dev *pdev = adapter->pdev;
2767 int i, err = 0;
2768
2769 for (i = 0; i < adapter->num_rx_queues; i++) {
2770 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2771 if (err) {
2772 dev_err(&pdev->dev,
2773 "Allocation for Rx Queue %u failed\n", i);
2774 for (i--; i >= 0; i--)
2775 igb_free_rx_resources(adapter->rx_ring[i]);
2776 break;
2777 }
2778 }
2779
2780 return err;
2781 }
2782
2783 /**
2784 * igb_setup_mrqc - configure the multiple receive queue control registers
2785 * @adapter: Board private structure
2786 **/
2787 static void igb_setup_mrqc(struct igb_adapter *adapter)
2788 {
2789 struct e1000_hw *hw = &adapter->hw;
2790 u32 mrqc, rxcsum;
2791 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2792 union e1000_reta {
2793 u32 dword;
2794 u8 bytes[4];
2795 } reta;
2796 static const u8 rsshash[40] = {
2797 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2798 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2799 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2800 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2801
2802 /* Fill out hash function seeds */
2803 for (j = 0; j < 10; j++) {
2804 u32 rsskey = rsshash[(j * 4)];
2805 rsskey |= rsshash[(j * 4) + 1] << 8;
2806 rsskey |= rsshash[(j * 4) + 2] << 16;
2807 rsskey |= rsshash[(j * 4) + 3] << 24;
2808 array_wr32(E1000_RSSRK(0), j, rsskey);
2809 }
2810
2811 num_rx_queues = adapter->rss_queues;
2812
2813 if (adapter->vfs_allocated_count) {
2814 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2815 switch (hw->mac.type) {
2816 case e1000_i350:
2817 case e1000_82580:
2818 num_rx_queues = 1;
2819 shift = 0;
2820 break;
2821 case e1000_82576:
2822 shift = 3;
2823 num_rx_queues = 2;
2824 break;
2825 case e1000_82575:
2826 shift = 2;
2827 shift2 = 6;
2828 default:
2829 break;
2830 }
2831 } else {
2832 if (hw->mac.type == e1000_82575)
2833 shift = 6;
2834 }
2835
2836 for (j = 0; j < (32 * 4); j++) {
2837 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2838 if (shift2)
2839 reta.bytes[j & 3] |= num_rx_queues << shift2;
2840 if ((j & 3) == 3)
2841 wr32(E1000_RETA(j >> 2), reta.dword);
2842 }
2843
2844 /*
2845 * Disable raw packet checksumming so that RSS hash is placed in
2846 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2847 * offloads as they are enabled by default
2848 */
2849 rxcsum = rd32(E1000_RXCSUM);
2850 rxcsum |= E1000_RXCSUM_PCSD;
2851
2852 if (adapter->hw.mac.type >= e1000_82576)
2853 /* Enable Receive Checksum Offload for SCTP */
2854 rxcsum |= E1000_RXCSUM_CRCOFL;
2855
2856 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2857 wr32(E1000_RXCSUM, rxcsum);
2858
2859 /* If VMDq is enabled then we set the appropriate mode for that, else
2860 * we default to RSS so that an RSS hash is calculated per packet even
2861 * if we are only using one queue */
2862 if (adapter->vfs_allocated_count) {
2863 if (hw->mac.type > e1000_82575) {
2864 /* Set the default pool for the PF's first queue */
2865 u32 vtctl = rd32(E1000_VT_CTL);
2866 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2867 E1000_VT_CTL_DISABLE_DEF_POOL);
2868 vtctl |= adapter->vfs_allocated_count <<
2869 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2870 wr32(E1000_VT_CTL, vtctl);
2871 }
2872 if (adapter->rss_queues > 1)
2873 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2874 else
2875 mrqc = E1000_MRQC_ENABLE_VMDQ;
2876 } else {
2877 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2878 }
2879 igb_vmm_control(adapter);
2880
2881 /*
2882 * Generate RSS hash based on TCP port numbers and/or
2883 * IPv4/v6 src and dst addresses since UDP cannot be
2884 * hashed reliably due to IP fragmentation
2885 */
2886 mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2887 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2888 E1000_MRQC_RSS_FIELD_IPV6 |
2889 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2890 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2891
2892 wr32(E1000_MRQC, mrqc);
2893 }
2894
2895 /**
2896 * igb_setup_rctl - configure the receive control registers
2897 * @adapter: Board private structure
2898 **/
2899 void igb_setup_rctl(struct igb_adapter *adapter)
2900 {
2901 struct e1000_hw *hw = &adapter->hw;
2902 u32 rctl;
2903
2904 rctl = rd32(E1000_RCTL);
2905
2906 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2907 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2908
2909 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2910 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2911
2912 /*
2913 * enable stripping of CRC. It's unlikely this will break BMC
2914 * redirection as it did with e1000. Newer features require
2915 * that the HW strips the CRC.
2916 */
2917 rctl |= E1000_RCTL_SECRC;
2918
2919 /* disable store bad packets and clear size bits. */
2920 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2921
2922 /* enable LPE to prevent packets larger than max_frame_size */
2923 rctl |= E1000_RCTL_LPE;
2924
2925 /* disable queue 0 to prevent tail write w/o re-config */
2926 wr32(E1000_RXDCTL(0), 0);
2927
2928 /* Attention!!! For SR-IOV PF driver operations you must enable
2929 * queue drop for all VF and PF queues to prevent head of line blocking
2930 * if an un-trusted VF does not provide descriptors to hardware.
2931 */
2932 if (adapter->vfs_allocated_count) {
2933 /* set all queue drop enable bits */
2934 wr32(E1000_QDE, ALL_QUEUES);
2935 }
2936
2937 wr32(E1000_RCTL, rctl);
2938 }
2939
2940 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2941 int vfn)
2942 {
2943 struct e1000_hw *hw = &adapter->hw;
2944 u32 vmolr;
2945
2946 /* if it isn't the PF check to see if VFs are enabled and
2947 * increase the size to support vlan tags */
2948 if (vfn < adapter->vfs_allocated_count &&
2949 adapter->vf_data[vfn].vlans_enabled)
2950 size += VLAN_TAG_SIZE;
2951
2952 vmolr = rd32(E1000_VMOLR(vfn));
2953 vmolr &= ~E1000_VMOLR_RLPML_MASK;
2954 vmolr |= size | E1000_VMOLR_LPE;
2955 wr32(E1000_VMOLR(vfn), vmolr);
2956
2957 return 0;
2958 }
2959
2960 /**
2961 * igb_rlpml_set - set maximum receive packet size
2962 * @adapter: board private structure
2963 *
2964 * Configure maximum receivable packet size.
2965 **/
2966 static void igb_rlpml_set(struct igb_adapter *adapter)
2967 {
2968 u32 max_frame_size;
2969 struct e1000_hw *hw = &adapter->hw;
2970 u16 pf_id = adapter->vfs_allocated_count;
2971
2972 max_frame_size = adapter->max_frame_size + VLAN_TAG_SIZE;
2973
2974 /* if vfs are enabled we set RLPML to the largest possible request
2975 * size and set the VMOLR RLPML to the size we need */
2976 if (pf_id) {
2977 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2978 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2979 }
2980
2981 wr32(E1000_RLPML, max_frame_size);
2982 }
2983
2984 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2985 int vfn, bool aupe)
2986 {
2987 struct e1000_hw *hw = &adapter->hw;
2988 u32 vmolr;
2989
2990 /*
2991 * This register exists only on 82576 and newer so if we are older then
2992 * we should exit and do nothing
2993 */
2994 if (hw->mac.type < e1000_82576)
2995 return;
2996
2997 vmolr = rd32(E1000_VMOLR(vfn));
2998 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
2999 if (aupe)
3000 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
3001 else
3002 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3003
3004 /* clear all bits that might not be set */
3005 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3006
3007 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3008 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3009 /*
3010 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3011 * multicast packets
3012 */
3013 if (vfn <= adapter->vfs_allocated_count)
3014 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
3015
3016 wr32(E1000_VMOLR(vfn), vmolr);
3017 }
3018
3019 /**
3020 * igb_configure_rx_ring - Configure a receive ring after Reset
3021 * @adapter: board private structure
3022 * @ring: receive ring to be configured
3023 *
3024 * Configure the Rx unit of the MAC after a reset.
3025 **/
3026 void igb_configure_rx_ring(struct igb_adapter *adapter,
3027 struct igb_ring *ring)
3028 {
3029 struct e1000_hw *hw = &adapter->hw;
3030 u64 rdba = ring->dma;
3031 int reg_idx = ring->reg_idx;
3032 u32 srrctl, rxdctl;
3033
3034 /* disable the queue */
3035 rxdctl = rd32(E1000_RXDCTL(reg_idx));
3036 wr32(E1000_RXDCTL(reg_idx),
3037 rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
3038
3039 /* Set DMA base address registers */
3040 wr32(E1000_RDBAL(reg_idx),
3041 rdba & 0x00000000ffffffffULL);
3042 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3043 wr32(E1000_RDLEN(reg_idx),
3044 ring->count * sizeof(union e1000_adv_rx_desc));
3045
3046 /* initialize head and tail */
3047 ring->head = hw->hw_addr + E1000_RDH(reg_idx);
3048 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3049 writel(0, ring->head);
3050 writel(0, ring->tail);
3051
3052 /* set descriptor configuration */
3053 if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
3054 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
3055 E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3056 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3057 srrctl |= IGB_RXBUFFER_16384 >>
3058 E1000_SRRCTL_BSIZEPKT_SHIFT;
3059 #else
3060 srrctl |= (PAGE_SIZE / 2) >>
3061 E1000_SRRCTL_BSIZEPKT_SHIFT;
3062 #endif
3063 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3064 } else {
3065 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
3066 E1000_SRRCTL_BSIZEPKT_SHIFT;
3067 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3068 }
3069 if (hw->mac.type == e1000_82580)
3070 srrctl |= E1000_SRRCTL_TIMESTAMP;
3071 /* Only set Drop Enable if we are supporting multiple queues */
3072 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3073 srrctl |= E1000_SRRCTL_DROP_EN;
3074
3075 wr32(E1000_SRRCTL(reg_idx), srrctl);
3076
3077 /* set filtering for VMDQ pools */
3078 igb_set_vmolr(adapter, reg_idx & 0x7, true);
3079
3080 /* enable receive descriptor fetching */
3081 rxdctl = rd32(E1000_RXDCTL(reg_idx));
3082 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3083 rxdctl &= 0xFFF00000;
3084 rxdctl |= IGB_RX_PTHRESH;
3085 rxdctl |= IGB_RX_HTHRESH << 8;
3086 rxdctl |= IGB_RX_WTHRESH << 16;
3087 wr32(E1000_RXDCTL(reg_idx), rxdctl);
3088 }
3089
3090 /**
3091 * igb_configure_rx - Configure receive Unit after Reset
3092 * @adapter: board private structure
3093 *
3094 * Configure the Rx unit of the MAC after a reset.
3095 **/
3096 static void igb_configure_rx(struct igb_adapter *adapter)
3097 {
3098 int i;
3099
3100 /* set UTA to appropriate mode */
3101 igb_set_uta(adapter);
3102
3103 /* set the correct pool for the PF default MAC address in entry 0 */
3104 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3105 adapter->vfs_allocated_count);
3106
3107 /* Setup the HW Rx Head and Tail Descriptor Pointers and
3108 * the Base and Length of the Rx Descriptor Ring */
3109 for (i = 0; i < adapter->num_rx_queues; i++)
3110 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3111 }
3112
3113 /**
3114 * igb_free_tx_resources - Free Tx Resources per Queue
3115 * @tx_ring: Tx descriptor ring for a specific queue
3116 *
3117 * Free all transmit software resources
3118 **/
3119 void igb_free_tx_resources(struct igb_ring *tx_ring)
3120 {
3121 igb_clean_tx_ring(tx_ring);
3122
3123 vfree(tx_ring->buffer_info);
3124 tx_ring->buffer_info = NULL;
3125
3126 /* if not set, then don't free */
3127 if (!tx_ring->desc)
3128 return;
3129
3130 dma_free_coherent(tx_ring->dev, tx_ring->size,
3131 tx_ring->desc, tx_ring->dma);
3132
3133 tx_ring->desc = NULL;
3134 }
3135
3136 /**
3137 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3138 * @adapter: board private structure
3139 *
3140 * Free all transmit software resources
3141 **/
3142 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3143 {
3144 int i;
3145
3146 for (i = 0; i < adapter->num_tx_queues; i++)
3147 igb_free_tx_resources(adapter->tx_ring[i]);
3148 }
3149
3150 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3151 struct igb_buffer *buffer_info)
3152 {
3153 if (buffer_info->dma) {
3154 if (buffer_info->mapped_as_page)
3155 dma_unmap_page(tx_ring->dev,
3156 buffer_info->dma,
3157 buffer_info->length,
3158 DMA_TO_DEVICE);
3159 else
3160 dma_unmap_single(tx_ring->dev,
3161 buffer_info->dma,
3162 buffer_info->length,
3163 DMA_TO_DEVICE);
3164 buffer_info->dma = 0;
3165 }
3166 if (buffer_info->skb) {
3167 dev_kfree_skb_any(buffer_info->skb);
3168 buffer_info->skb = NULL;
3169 }
3170 buffer_info->time_stamp = 0;
3171 buffer_info->length = 0;
3172 buffer_info->next_to_watch = 0;
3173 buffer_info->mapped_as_page = false;
3174 }
3175
3176 /**
3177 * igb_clean_tx_ring - Free Tx Buffers
3178 * @tx_ring: ring to be cleaned
3179 **/
3180 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3181 {
3182 struct igb_buffer *buffer_info;
3183 unsigned long size;
3184 unsigned int i;
3185
3186 if (!tx_ring->buffer_info)
3187 return;
3188 /* Free all the Tx ring sk_buffs */
3189
3190 for (i = 0; i < tx_ring->count; i++) {
3191 buffer_info = &tx_ring->buffer_info[i];
3192 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3193 }
3194
3195 size = sizeof(struct igb_buffer) * tx_ring->count;
3196 memset(tx_ring->buffer_info, 0, size);
3197
3198 /* Zero out the descriptor ring */
3199 memset(tx_ring->desc, 0, tx_ring->size);
3200
3201 tx_ring->next_to_use = 0;
3202 tx_ring->next_to_clean = 0;
3203 }
3204
3205 /**
3206 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3207 * @adapter: board private structure
3208 **/
3209 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3210 {
3211 int i;
3212
3213 for (i = 0; i < adapter->num_tx_queues; i++)
3214 igb_clean_tx_ring(adapter->tx_ring[i]);
3215 }
3216
3217 /**
3218 * igb_free_rx_resources - Free Rx Resources
3219 * @rx_ring: ring to clean the resources from
3220 *
3221 * Free all receive software resources
3222 **/
3223 void igb_free_rx_resources(struct igb_ring *rx_ring)
3224 {
3225 igb_clean_rx_ring(rx_ring);
3226
3227 vfree(rx_ring->buffer_info);
3228 rx_ring->buffer_info = NULL;
3229
3230 /* if not set, then don't free */
3231 if (!rx_ring->desc)
3232 return;
3233
3234 dma_free_coherent(rx_ring->dev, rx_ring->size,
3235 rx_ring->desc, rx_ring->dma);
3236
3237 rx_ring->desc = NULL;
3238 }
3239
3240 /**
3241 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3242 * @adapter: board private structure
3243 *
3244 * Free all receive software resources
3245 **/
3246 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3247 {
3248 int i;
3249
3250 for (i = 0; i < adapter->num_rx_queues; i++)
3251 igb_free_rx_resources(adapter->rx_ring[i]);
3252 }
3253
3254 /**
3255 * igb_clean_rx_ring - Free Rx Buffers per Queue
3256 * @rx_ring: ring to free buffers from
3257 **/
3258 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3259 {
3260 struct igb_buffer *buffer_info;
3261 unsigned long size;
3262 unsigned int i;
3263
3264 if (!rx_ring->buffer_info)
3265 return;
3266
3267 /* Free all the Rx ring sk_buffs */
3268 for (i = 0; i < rx_ring->count; i++) {
3269 buffer_info = &rx_ring->buffer_info[i];
3270 if (buffer_info->dma) {
3271 dma_unmap_single(rx_ring->dev,
3272 buffer_info->dma,
3273 rx_ring->rx_buffer_len,
3274 DMA_FROM_DEVICE);
3275 buffer_info->dma = 0;
3276 }
3277
3278 if (buffer_info->skb) {
3279 dev_kfree_skb(buffer_info->skb);
3280 buffer_info->skb = NULL;
3281 }
3282 if (buffer_info->page_dma) {
3283 dma_unmap_page(rx_ring->dev,
3284 buffer_info->page_dma,
3285 PAGE_SIZE / 2,
3286 DMA_FROM_DEVICE);
3287 buffer_info->page_dma = 0;
3288 }
3289 if (buffer_info->page) {
3290 put_page(buffer_info->page);
3291 buffer_info->page = NULL;
3292 buffer_info->page_offset = 0;
3293 }
3294 }
3295
3296 size = sizeof(struct igb_buffer) * rx_ring->count;
3297 memset(rx_ring->buffer_info, 0, size);
3298
3299 /* Zero out the descriptor ring */
3300 memset(rx_ring->desc, 0, rx_ring->size);
3301
3302 rx_ring->next_to_clean = 0;
3303 rx_ring->next_to_use = 0;
3304 }
3305
3306 /**
3307 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3308 * @adapter: board private structure
3309 **/
3310 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3311 {
3312 int i;
3313
3314 for (i = 0; i < adapter->num_rx_queues; i++)
3315 igb_clean_rx_ring(adapter->rx_ring[i]);
3316 }
3317
3318 /**
3319 * igb_set_mac - Change the Ethernet Address of the NIC
3320 * @netdev: network interface device structure
3321 * @p: pointer to an address structure
3322 *
3323 * Returns 0 on success, negative on failure
3324 **/
3325 static int igb_set_mac(struct net_device *netdev, void *p)
3326 {
3327 struct igb_adapter *adapter = netdev_priv(netdev);
3328 struct e1000_hw *hw = &adapter->hw;
3329 struct sockaddr *addr = p;
3330
3331 if (!is_valid_ether_addr(addr->sa_data))
3332 return -EADDRNOTAVAIL;
3333
3334 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3335 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3336
3337 /* set the correct pool for the new PF MAC address in entry 0 */
3338 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3339 adapter->vfs_allocated_count);
3340
3341 return 0;
3342 }
3343
3344 /**
3345 * igb_write_mc_addr_list - write multicast addresses to MTA
3346 * @netdev: network interface device structure
3347 *
3348 * Writes multicast address list to the MTA hash table.
3349 * Returns: -ENOMEM on failure
3350 * 0 on no addresses written
3351 * X on writing X addresses to MTA
3352 **/
3353 static int igb_write_mc_addr_list(struct net_device *netdev)
3354 {
3355 struct igb_adapter *adapter = netdev_priv(netdev);
3356 struct e1000_hw *hw = &adapter->hw;
3357 struct netdev_hw_addr *ha;
3358 u8 *mta_list;
3359 int i;
3360
3361 if (netdev_mc_empty(netdev)) {
3362 /* nothing to program, so clear mc list */
3363 igb_update_mc_addr_list(hw, NULL, 0);
3364 igb_restore_vf_multicasts(adapter);
3365 return 0;
3366 }
3367
3368 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3369 if (!mta_list)
3370 return -ENOMEM;
3371
3372 /* The shared function expects a packed array of only addresses. */
3373 i = 0;
3374 netdev_for_each_mc_addr(ha, netdev)
3375 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3376
3377 igb_update_mc_addr_list(hw, mta_list, i);
3378 kfree(mta_list);
3379
3380 return netdev_mc_count(netdev);
3381 }
3382
3383 /**
3384 * igb_write_uc_addr_list - write unicast addresses to RAR table
3385 * @netdev: network interface device structure
3386 *
3387 * Writes unicast address list to the RAR table.
3388 * Returns: -ENOMEM on failure/insufficient address space
3389 * 0 on no addresses written
3390 * X on writing X addresses to the RAR table
3391 **/
3392 static int igb_write_uc_addr_list(struct net_device *netdev)
3393 {
3394 struct igb_adapter *adapter = netdev_priv(netdev);
3395 struct e1000_hw *hw = &adapter->hw;
3396 unsigned int vfn = adapter->vfs_allocated_count;
3397 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3398 int count = 0;
3399
3400 /* return ENOMEM indicating insufficient memory for addresses */
3401 if (netdev_uc_count(netdev) > rar_entries)
3402 return -ENOMEM;
3403
3404 if (!netdev_uc_empty(netdev) && rar_entries) {
3405 struct netdev_hw_addr *ha;
3406
3407 netdev_for_each_uc_addr(ha, netdev) {
3408 if (!rar_entries)
3409 break;
3410 igb_rar_set_qsel(adapter, ha->addr,
3411 rar_entries--,
3412 vfn);
3413 count++;
3414 }
3415 }
3416 /* write the addresses in reverse order to avoid write combining */
3417 for (; rar_entries > 0 ; rar_entries--) {
3418 wr32(E1000_RAH(rar_entries), 0);
3419 wr32(E1000_RAL(rar_entries), 0);
3420 }
3421 wrfl();
3422
3423 return count;
3424 }
3425
3426 /**
3427 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3428 * @netdev: network interface device structure
3429 *
3430 * The set_rx_mode entry point is called whenever the unicast or multicast
3431 * address lists or the network interface flags are updated. This routine is
3432 * responsible for configuring the hardware for proper unicast, multicast,
3433 * promiscuous mode, and all-multi behavior.
3434 **/
3435 static void igb_set_rx_mode(struct net_device *netdev)
3436 {
3437 struct igb_adapter *adapter = netdev_priv(netdev);
3438 struct e1000_hw *hw = &adapter->hw;
3439 unsigned int vfn = adapter->vfs_allocated_count;
3440 u32 rctl, vmolr = 0;
3441 int count;
3442
3443 /* Check for Promiscuous and All Multicast modes */
3444 rctl = rd32(E1000_RCTL);
3445
3446 /* clear the effected bits */
3447 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3448
3449 if (netdev->flags & IFF_PROMISC) {
3450 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3451 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3452 } else {
3453 if (netdev->flags & IFF_ALLMULTI) {
3454 rctl |= E1000_RCTL_MPE;
3455 vmolr |= E1000_VMOLR_MPME;
3456 } else {
3457 /*
3458 * Write addresses to the MTA, if the attempt fails
3459 * then we should just turn on promiscuous mode so
3460 * that we can at least receive multicast traffic
3461 */
3462 count = igb_write_mc_addr_list(netdev);
3463 if (count < 0) {
3464 rctl |= E1000_RCTL_MPE;
3465 vmolr |= E1000_VMOLR_MPME;
3466 } else if (count) {
3467 vmolr |= E1000_VMOLR_ROMPE;
3468 }
3469 }
3470 /*
3471 * Write addresses to available RAR registers, if there is not
3472 * sufficient space to store all the addresses then enable
3473 * unicast promiscuous mode
3474 */
3475 count = igb_write_uc_addr_list(netdev);
3476 if (count < 0) {
3477 rctl |= E1000_RCTL_UPE;
3478 vmolr |= E1000_VMOLR_ROPE;
3479 }
3480 rctl |= E1000_RCTL_VFE;
3481 }
3482 wr32(E1000_RCTL, rctl);
3483
3484 /*
3485 * In order to support SR-IOV and eventually VMDq it is necessary to set
3486 * the VMOLR to enable the appropriate modes. Without this workaround
3487 * we will have issues with VLAN tag stripping not being done for frames
3488 * that are only arriving because we are the default pool
3489 */
3490 if (hw->mac.type < e1000_82576)
3491 return;
3492
3493 vmolr |= rd32(E1000_VMOLR(vfn)) &
3494 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3495 wr32(E1000_VMOLR(vfn), vmolr);
3496 igb_restore_vf_multicasts(adapter);
3497 }
3498
3499 static void igb_check_wvbr(struct igb_adapter *adapter)
3500 {
3501 struct e1000_hw *hw = &adapter->hw;
3502 u32 wvbr = 0;
3503
3504 switch (hw->mac.type) {
3505 case e1000_82576:
3506 case e1000_i350:
3507 if (!(wvbr = rd32(E1000_WVBR)))
3508 return;
3509 break;
3510 default:
3511 break;
3512 }
3513
3514 adapter->wvbr |= wvbr;
3515 }
3516
3517 #define IGB_STAGGERED_QUEUE_OFFSET 8
3518
3519 static void igb_spoof_check(struct igb_adapter *adapter)
3520 {
3521 int j;
3522
3523 if (!adapter->wvbr)
3524 return;
3525
3526 for(j = 0; j < adapter->vfs_allocated_count; j++) {
3527 if (adapter->wvbr & (1 << j) ||
3528 adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3529 dev_warn(&adapter->pdev->dev,
3530 "Spoof event(s) detected on VF %d\n", j);
3531 adapter->wvbr &=
3532 ~((1 << j) |
3533 (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3534 }
3535 }
3536 }
3537
3538 /* Need to wait a few seconds after link up to get diagnostic information from
3539 * the phy */
3540 static void igb_update_phy_info(unsigned long data)
3541 {
3542 struct igb_adapter *adapter = (struct igb_adapter *) data;
3543 igb_get_phy_info(&adapter->hw);
3544 }
3545
3546 /**
3547 * igb_has_link - check shared code for link and determine up/down
3548 * @adapter: pointer to driver private info
3549 **/
3550 bool igb_has_link(struct igb_adapter *adapter)
3551 {
3552 struct e1000_hw *hw = &adapter->hw;
3553 bool link_active = false;
3554 s32 ret_val = 0;
3555
3556 /* get_link_status is set on LSC (link status) interrupt or
3557 * rx sequence error interrupt. get_link_status will stay
3558 * false until the e1000_check_for_link establishes link
3559 * for copper adapters ONLY
3560 */
3561 switch (hw->phy.media_type) {
3562 case e1000_media_type_copper:
3563 if (hw->mac.get_link_status) {
3564 ret_val = hw->mac.ops.check_for_link(hw);
3565 link_active = !hw->mac.get_link_status;
3566 } else {
3567 link_active = true;
3568 }
3569 break;
3570 case e1000_media_type_internal_serdes:
3571 ret_val = hw->mac.ops.check_for_link(hw);
3572 link_active = hw->mac.serdes_has_link;
3573 break;
3574 default:
3575 case e1000_media_type_unknown:
3576 break;
3577 }
3578
3579 return link_active;
3580 }
3581
3582 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3583 {
3584 bool ret = false;
3585 u32 ctrl_ext, thstat;
3586
3587 /* check for thermal sensor event on i350, copper only */
3588 if (hw->mac.type == e1000_i350) {
3589 thstat = rd32(E1000_THSTAT);
3590 ctrl_ext = rd32(E1000_CTRL_EXT);
3591
3592 if ((hw->phy.media_type == e1000_media_type_copper) &&
3593 !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3594 ret = !!(thstat & event);
3595 }
3596 }
3597
3598 return ret;
3599 }
3600
3601 /**
3602 * igb_watchdog - Timer Call-back
3603 * @data: pointer to adapter cast into an unsigned long
3604 **/
3605 static void igb_watchdog(unsigned long data)
3606 {
3607 struct igb_adapter *adapter = (struct igb_adapter *)data;
3608 /* Do the rest outside of interrupt context */
3609 schedule_work(&adapter->watchdog_task);
3610 }
3611
3612 static void igb_watchdog_task(struct work_struct *work)
3613 {
3614 struct igb_adapter *adapter = container_of(work,
3615 struct igb_adapter,
3616 watchdog_task);
3617 struct e1000_hw *hw = &adapter->hw;
3618 struct net_device *netdev = adapter->netdev;
3619 u32 link;
3620 int i;
3621
3622 link = igb_has_link(adapter);
3623 if (link) {
3624 if (!netif_carrier_ok(netdev)) {
3625 u32 ctrl;
3626 hw->mac.ops.get_speed_and_duplex(hw,
3627 &adapter->link_speed,
3628 &adapter->link_duplex);
3629
3630 ctrl = rd32(E1000_CTRL);
3631 /* Links status message must follow this format */
3632 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3633 "Flow Control: %s\n",
3634 netdev->name,
3635 adapter->link_speed,
3636 adapter->link_duplex == FULL_DUPLEX ?
3637 "Full Duplex" : "Half Duplex",
3638 ((ctrl & E1000_CTRL_TFCE) &&
3639 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3640 ((ctrl & E1000_CTRL_RFCE) ? "RX" :
3641 ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None")));
3642
3643 /* check for thermal sensor event */
3644 if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3645 printk(KERN_INFO "igb: %s The network adapter "
3646 "link speed was downshifted "
3647 "because it overheated.\n",
3648 netdev->name);
3649 }
3650
3651 /* adjust timeout factor according to speed/duplex */
3652 adapter->tx_timeout_factor = 1;
3653 switch (adapter->link_speed) {
3654 case SPEED_10:
3655 adapter->tx_timeout_factor = 14;
3656 break;
3657 case SPEED_100:
3658 /* maybe add some timeout factor ? */
3659 break;
3660 }
3661
3662 netif_carrier_on(netdev);
3663
3664 igb_ping_all_vfs(adapter);
3665 igb_check_vf_rate_limit(adapter);
3666
3667 /* link state has changed, schedule phy info update */
3668 if (!test_bit(__IGB_DOWN, &adapter->state))
3669 mod_timer(&adapter->phy_info_timer,
3670 round_jiffies(jiffies + 2 * HZ));
3671 }
3672 } else {
3673 if (netif_carrier_ok(netdev)) {
3674 adapter->link_speed = 0;
3675 adapter->link_duplex = 0;
3676
3677 /* check for thermal sensor event */
3678 if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3679 printk(KERN_ERR "igb: %s The network adapter "
3680 "was stopped because it "
3681 "overheated.\n",
3682 netdev->name);
3683 }
3684
3685 /* Links status message must follow this format */
3686 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3687 netdev->name);
3688 netif_carrier_off(netdev);
3689
3690 igb_ping_all_vfs(adapter);
3691
3692 /* link state has changed, schedule phy info update */
3693 if (!test_bit(__IGB_DOWN, &adapter->state))
3694 mod_timer(&adapter->phy_info_timer,
3695 round_jiffies(jiffies + 2 * HZ));
3696 }
3697 }
3698
3699 spin_lock(&adapter->stats64_lock);
3700 igb_update_stats(adapter, &adapter->stats64);
3701 spin_unlock(&adapter->stats64_lock);
3702
3703 for (i = 0; i < adapter->num_tx_queues; i++) {
3704 struct igb_ring *tx_ring = adapter->tx_ring[i];
3705 if (!netif_carrier_ok(netdev)) {
3706 /* We've lost link, so the controller stops DMA,
3707 * but we've got queued Tx work that's never going
3708 * to get done, so reset controller to flush Tx.
3709 * (Do the reset outside of interrupt context). */
3710 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3711 adapter->tx_timeout_count++;
3712 schedule_work(&adapter->reset_task);
3713 /* return immediately since reset is imminent */
3714 return;
3715 }
3716 }
3717
3718 /* Force detection of hung controller every watchdog period */
3719 tx_ring->detect_tx_hung = true;
3720 }
3721
3722 /* Cause software interrupt to ensure rx ring is cleaned */
3723 if (adapter->msix_entries) {
3724 u32 eics = 0;
3725 for (i = 0; i < adapter->num_q_vectors; i++) {
3726 struct igb_q_vector *q_vector = adapter->q_vector[i];
3727 eics |= q_vector->eims_value;
3728 }
3729 wr32(E1000_EICS, eics);
3730 } else {
3731 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3732 }
3733
3734 igb_spoof_check(adapter);
3735
3736 /* Reset the timer */
3737 if (!test_bit(__IGB_DOWN, &adapter->state))
3738 mod_timer(&adapter->watchdog_timer,
3739 round_jiffies(jiffies + 2 * HZ));
3740 }
3741
3742 enum latency_range {
3743 lowest_latency = 0,
3744 low_latency = 1,
3745 bulk_latency = 2,
3746 latency_invalid = 255
3747 };
3748
3749 /**
3750 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3751 *
3752 * Stores a new ITR value based on strictly on packet size. This
3753 * algorithm is less sophisticated than that used in igb_update_itr,
3754 * due to the difficulty of synchronizing statistics across multiple
3755 * receive rings. The divisors and thresholds used by this function
3756 * were determined based on theoretical maximum wire speed and testing
3757 * data, in order to minimize response time while increasing bulk
3758 * throughput.
3759 * This functionality is controlled by the InterruptThrottleRate module
3760 * parameter (see igb_param.c)
3761 * NOTE: This function is called only when operating in a multiqueue
3762 * receive environment.
3763 * @q_vector: pointer to q_vector
3764 **/
3765 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3766 {
3767 int new_val = q_vector->itr_val;
3768 int avg_wire_size = 0;
3769 struct igb_adapter *adapter = q_vector->adapter;
3770 struct igb_ring *ring;
3771 unsigned int packets;
3772
3773 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3774 * ints/sec - ITR timer value of 120 ticks.
3775 */
3776 if (adapter->link_speed != SPEED_1000) {
3777 new_val = 976;
3778 goto set_itr_val;
3779 }
3780
3781 ring = q_vector->rx_ring;
3782 if (ring) {
3783 packets = ACCESS_ONCE(ring->total_packets);
3784
3785 if (packets)
3786 avg_wire_size = ring->total_bytes / packets;
3787 }
3788
3789 ring = q_vector->tx_ring;
3790 if (ring) {
3791 packets = ACCESS_ONCE(ring->total_packets);
3792
3793 if (packets)
3794 avg_wire_size = max_t(u32, avg_wire_size,
3795 ring->total_bytes / packets);
3796 }
3797
3798 /* if avg_wire_size isn't set no work was done */
3799 if (!avg_wire_size)
3800 goto clear_counts;
3801
3802 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3803 avg_wire_size += 24;
3804
3805 /* Don't starve jumbo frames */
3806 avg_wire_size = min(avg_wire_size, 3000);
3807
3808 /* Give a little boost to mid-size frames */
3809 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3810 new_val = avg_wire_size / 3;
3811 else
3812 new_val = avg_wire_size / 2;
3813
3814 /* when in itr mode 3 do not exceed 20K ints/sec */
3815 if (adapter->rx_itr_setting == 3 && new_val < 196)
3816 new_val = 196;
3817
3818 set_itr_val:
3819 if (new_val != q_vector->itr_val) {
3820 q_vector->itr_val = new_val;
3821 q_vector->set_itr = 1;
3822 }
3823 clear_counts:
3824 if (q_vector->rx_ring) {
3825 q_vector->rx_ring->total_bytes = 0;
3826 q_vector->rx_ring->total_packets = 0;
3827 }
3828 if (q_vector->tx_ring) {
3829 q_vector->tx_ring->total_bytes = 0;
3830 q_vector->tx_ring->total_packets = 0;
3831 }
3832 }
3833
3834 /**
3835 * igb_update_itr - update the dynamic ITR value based on statistics
3836 * Stores a new ITR value based on packets and byte
3837 * counts during the last interrupt. The advantage of per interrupt
3838 * computation is faster updates and more accurate ITR for the current
3839 * traffic pattern. Constants in this function were computed
3840 * based on theoretical maximum wire speed and thresholds were set based
3841 * on testing data as well as attempting to minimize response time
3842 * while increasing bulk throughput.
3843 * this functionality is controlled by the InterruptThrottleRate module
3844 * parameter (see igb_param.c)
3845 * NOTE: These calculations are only valid when operating in a single-
3846 * queue environment.
3847 * @adapter: pointer to adapter
3848 * @itr_setting: current q_vector->itr_val
3849 * @packets: the number of packets during this measurement interval
3850 * @bytes: the number of bytes during this measurement interval
3851 **/
3852 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3853 int packets, int bytes)
3854 {
3855 unsigned int retval = itr_setting;
3856
3857 if (packets == 0)
3858 goto update_itr_done;
3859
3860 switch (itr_setting) {
3861 case lowest_latency:
3862 /* handle TSO and jumbo frames */
3863 if (bytes/packets > 8000)
3864 retval = bulk_latency;
3865 else if ((packets < 5) && (bytes > 512))
3866 retval = low_latency;
3867 break;
3868 case low_latency: /* 50 usec aka 20000 ints/s */
3869 if (bytes > 10000) {
3870 /* this if handles the TSO accounting */
3871 if (bytes/packets > 8000) {
3872 retval = bulk_latency;
3873 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3874 retval = bulk_latency;
3875 } else if ((packets > 35)) {
3876 retval = lowest_latency;
3877 }
3878 } else if (bytes/packets > 2000) {
3879 retval = bulk_latency;
3880 } else if (packets <= 2 && bytes < 512) {
3881 retval = lowest_latency;
3882 }
3883 break;
3884 case bulk_latency: /* 250 usec aka 4000 ints/s */
3885 if (bytes > 25000) {
3886 if (packets > 35)
3887 retval = low_latency;
3888 } else if (bytes < 1500) {
3889 retval = low_latency;
3890 }
3891 break;
3892 }
3893
3894 update_itr_done:
3895 return retval;
3896 }
3897
3898 static void igb_set_itr(struct igb_adapter *adapter)
3899 {
3900 struct igb_q_vector *q_vector = adapter->q_vector[0];
3901 u16 current_itr;
3902 u32 new_itr = q_vector->itr_val;
3903
3904 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3905 if (adapter->link_speed != SPEED_1000) {
3906 current_itr = 0;
3907 new_itr = 4000;
3908 goto set_itr_now;
3909 }
3910
3911 adapter->rx_itr = igb_update_itr(adapter,
3912 adapter->rx_itr,
3913 q_vector->rx_ring->total_packets,
3914 q_vector->rx_ring->total_bytes);
3915
3916 adapter->tx_itr = igb_update_itr(adapter,
3917 adapter->tx_itr,
3918 q_vector->tx_ring->total_packets,
3919 q_vector->tx_ring->total_bytes);
3920 current_itr = max(adapter->rx_itr, adapter->tx_itr);
3921
3922 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3923 if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3924 current_itr = low_latency;
3925
3926 switch (current_itr) {
3927 /* counts and packets in update_itr are dependent on these numbers */
3928 case lowest_latency:
3929 new_itr = 56; /* aka 70,000 ints/sec */
3930 break;
3931 case low_latency:
3932 new_itr = 196; /* aka 20,000 ints/sec */
3933 break;
3934 case bulk_latency:
3935 new_itr = 980; /* aka 4,000 ints/sec */
3936 break;
3937 default:
3938 break;
3939 }
3940
3941 set_itr_now:
3942 q_vector->rx_ring->total_bytes = 0;
3943 q_vector->rx_ring->total_packets = 0;
3944 q_vector->tx_ring->total_bytes = 0;
3945 q_vector->tx_ring->total_packets = 0;
3946
3947 if (new_itr != q_vector->itr_val) {
3948 /* this attempts to bias the interrupt rate towards Bulk
3949 * by adding intermediate steps when interrupt rate is
3950 * increasing */
3951 new_itr = new_itr > q_vector->itr_val ?
3952 max((new_itr * q_vector->itr_val) /
3953 (new_itr + (q_vector->itr_val >> 2)),
3954 new_itr) :
3955 new_itr;
3956 /* Don't write the value here; it resets the adapter's
3957 * internal timer, and causes us to delay far longer than
3958 * we should between interrupts. Instead, we write the ITR
3959 * value at the beginning of the next interrupt so the timing
3960 * ends up being correct.
3961 */
3962 q_vector->itr_val = new_itr;
3963 q_vector->set_itr = 1;
3964 }
3965 }
3966
3967 #define IGB_TX_FLAGS_CSUM 0x00000001
3968 #define IGB_TX_FLAGS_VLAN 0x00000002
3969 #define IGB_TX_FLAGS_TSO 0x00000004
3970 #define IGB_TX_FLAGS_IPV4 0x00000008
3971 #define IGB_TX_FLAGS_TSTAMP 0x00000010
3972 #define IGB_TX_FLAGS_VLAN_MASK 0xffff0000
3973 #define IGB_TX_FLAGS_VLAN_SHIFT 16
3974
3975 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3976 struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3977 {
3978 struct e1000_adv_tx_context_desc *context_desc;
3979 unsigned int i;
3980 int err;
3981 struct igb_buffer *buffer_info;
3982 u32 info = 0, tu_cmd = 0;
3983 u32 mss_l4len_idx;
3984 u8 l4len;
3985
3986 if (skb_header_cloned(skb)) {
3987 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3988 if (err)
3989 return err;
3990 }
3991
3992 l4len = tcp_hdrlen(skb);
3993 *hdr_len += l4len;
3994
3995 if (skb->protocol == htons(ETH_P_IP)) {
3996 struct iphdr *iph = ip_hdr(skb);
3997 iph->tot_len = 0;
3998 iph->check = 0;
3999 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4000 iph->daddr, 0,
4001 IPPROTO_TCP,
4002 0);
4003 } else if (skb_is_gso_v6(skb)) {
4004 ipv6_hdr(skb)->payload_len = 0;
4005 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4006 &ipv6_hdr(skb)->daddr,
4007 0, IPPROTO_TCP, 0);
4008 }
4009
4010 i = tx_ring->next_to_use;
4011
4012 buffer_info = &tx_ring->buffer_info[i];
4013 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
4014 /* VLAN MACLEN IPLEN */
4015 if (tx_flags & IGB_TX_FLAGS_VLAN)
4016 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
4017 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
4018 *hdr_len += skb_network_offset(skb);
4019 info |= skb_network_header_len(skb);
4020 *hdr_len += skb_network_header_len(skb);
4021 context_desc->vlan_macip_lens = cpu_to_le32(info);
4022
4023 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4024 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4025
4026 if (skb->protocol == htons(ETH_P_IP))
4027 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4028 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4029
4030 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4031
4032 /* MSS L4LEN IDX */
4033 mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
4034 mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
4035
4036 /* For 82575, context index must be unique per ring. */
4037 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4038 mss_l4len_idx |= tx_ring->reg_idx << 4;
4039
4040 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
4041 context_desc->seqnum_seed = 0;
4042
4043 buffer_info->time_stamp = jiffies;
4044 buffer_info->next_to_watch = i;
4045 buffer_info->dma = 0;
4046 i++;
4047 if (i == tx_ring->count)
4048 i = 0;
4049
4050 tx_ring->next_to_use = i;
4051
4052 return true;
4053 }
4054
4055 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
4056 struct sk_buff *skb, u32 tx_flags)
4057 {
4058 struct e1000_adv_tx_context_desc *context_desc;
4059 struct device *dev = tx_ring->dev;
4060 struct igb_buffer *buffer_info;
4061 u32 info = 0, tu_cmd = 0;
4062 unsigned int i;
4063
4064 if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
4065 (tx_flags & IGB_TX_FLAGS_VLAN)) {
4066 i = tx_ring->next_to_use;
4067 buffer_info = &tx_ring->buffer_info[i];
4068 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
4069
4070 if (tx_flags & IGB_TX_FLAGS_VLAN)
4071 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
4072
4073 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
4074 if (skb->ip_summed == CHECKSUM_PARTIAL)
4075 info |= skb_network_header_len(skb);
4076
4077 context_desc->vlan_macip_lens = cpu_to_le32(info);
4078
4079 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4080
4081 if (skb->ip_summed == CHECKSUM_PARTIAL) {
4082 __be16 protocol;
4083
4084 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
4085 const struct vlan_ethhdr *vhdr =
4086 (const struct vlan_ethhdr*)skb->data;
4087
4088 protocol = vhdr->h_vlan_encapsulated_proto;
4089 } else {
4090 protocol = skb->protocol;
4091 }
4092
4093 switch (protocol) {
4094 case cpu_to_be16(ETH_P_IP):
4095 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4096 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
4097 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4098 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
4099 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4100 break;
4101 case cpu_to_be16(ETH_P_IPV6):
4102 /* XXX what about other V6 headers?? */
4103 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
4104 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4105 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
4106 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4107 break;
4108 default:
4109 if (unlikely(net_ratelimit()))
4110 dev_warn(dev,
4111 "partial checksum but proto=%x!\n",
4112 skb->protocol);
4113 break;
4114 }
4115 }
4116
4117 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4118 context_desc->seqnum_seed = 0;
4119 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4120 context_desc->mss_l4len_idx =
4121 cpu_to_le32(tx_ring->reg_idx << 4);
4122
4123 buffer_info->time_stamp = jiffies;
4124 buffer_info->next_to_watch = i;
4125 buffer_info->dma = 0;
4126
4127 i++;
4128 if (i == tx_ring->count)
4129 i = 0;
4130 tx_ring->next_to_use = i;
4131
4132 return true;
4133 }
4134 return false;
4135 }
4136
4137 #define IGB_MAX_TXD_PWR 16
4138 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
4139
4140 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
4141 unsigned int first)
4142 {
4143 struct igb_buffer *buffer_info;
4144 struct device *dev = tx_ring->dev;
4145 unsigned int hlen = skb_headlen(skb);
4146 unsigned int count = 0, i;
4147 unsigned int f;
4148 u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
4149
4150 i = tx_ring->next_to_use;
4151
4152 buffer_info = &tx_ring->buffer_info[i];
4153 BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
4154 buffer_info->length = hlen;
4155 /* set time_stamp *before* dma to help avoid a possible race */
4156 buffer_info->time_stamp = jiffies;
4157 buffer_info->next_to_watch = i;
4158 buffer_info->dma = dma_map_single(dev, skb->data, hlen,
4159 DMA_TO_DEVICE);
4160 if (dma_mapping_error(dev, buffer_info->dma))
4161 goto dma_error;
4162
4163 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
4164 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
4165 unsigned int len = frag->size;
4166
4167 count++;
4168 i++;
4169 if (i == tx_ring->count)
4170 i = 0;
4171
4172 buffer_info = &tx_ring->buffer_info[i];
4173 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
4174 buffer_info->length = len;
4175 buffer_info->time_stamp = jiffies;
4176 buffer_info->next_to_watch = i;
4177 buffer_info->mapped_as_page = true;
4178 buffer_info->dma = dma_map_page(dev,
4179 frag->page,
4180 frag->page_offset,
4181 len,
4182 DMA_TO_DEVICE);
4183 if (dma_mapping_error(dev, buffer_info->dma))
4184 goto dma_error;
4185
4186 }
4187
4188 tx_ring->buffer_info[i].skb = skb;
4189 tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
4190 /* multiply data chunks by size of headers */
4191 tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
4192 tx_ring->buffer_info[i].gso_segs = gso_segs;
4193 tx_ring->buffer_info[first].next_to_watch = i;
4194
4195 return ++count;
4196
4197 dma_error:
4198 dev_err(dev, "TX DMA map failed\n");
4199
4200 /* clear timestamp and dma mappings for failed buffer_info mapping */
4201 buffer_info->dma = 0;
4202 buffer_info->time_stamp = 0;
4203 buffer_info->length = 0;
4204 buffer_info->next_to_watch = 0;
4205 buffer_info->mapped_as_page = false;
4206
4207 /* clear timestamp and dma mappings for remaining portion of packet */
4208 while (count--) {
4209 if (i == 0)
4210 i = tx_ring->count;
4211 i--;
4212 buffer_info = &tx_ring->buffer_info[i];
4213 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4214 }
4215
4216 return 0;
4217 }
4218
4219 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
4220 u32 tx_flags, int count, u32 paylen,
4221 u8 hdr_len)
4222 {
4223 union e1000_adv_tx_desc *tx_desc;
4224 struct igb_buffer *buffer_info;
4225 u32 olinfo_status = 0, cmd_type_len;
4226 unsigned int i = tx_ring->next_to_use;
4227
4228 cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4229 E1000_ADVTXD_DCMD_DEXT);
4230
4231 if (tx_flags & IGB_TX_FLAGS_VLAN)
4232 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4233
4234 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4235 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4236
4237 if (tx_flags & IGB_TX_FLAGS_TSO) {
4238 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4239
4240 /* insert tcp checksum */
4241 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4242
4243 /* insert ip checksum */
4244 if (tx_flags & IGB_TX_FLAGS_IPV4)
4245 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4246
4247 } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4248 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4249 }
4250
4251 if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4252 (tx_flags & (IGB_TX_FLAGS_CSUM |
4253 IGB_TX_FLAGS_TSO |
4254 IGB_TX_FLAGS_VLAN)))
4255 olinfo_status |= tx_ring->reg_idx << 4;
4256
4257 olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4258
4259 do {
4260 buffer_info = &tx_ring->buffer_info[i];
4261 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4262 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4263 tx_desc->read.cmd_type_len =
4264 cpu_to_le32(cmd_type_len | buffer_info->length);
4265 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4266 count--;
4267 i++;
4268 if (i == tx_ring->count)
4269 i = 0;
4270 } while (count > 0);
4271
4272 tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4273 /* Force memory writes to complete before letting h/w
4274 * know there are new descriptors to fetch. (Only
4275 * applicable for weak-ordered memory model archs,
4276 * such as IA-64). */
4277 wmb();
4278
4279 tx_ring->next_to_use = i;
4280 writel(i, tx_ring->tail);
4281 /* we need this if more than one processor can write to our tail
4282 * at a time, it syncronizes IO on IA64/Altix systems */
4283 mmiowb();
4284 }
4285
4286 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4287 {
4288 struct net_device *netdev = tx_ring->netdev;
4289
4290 netif_stop_subqueue(netdev, tx_ring->queue_index);
4291
4292 /* Herbert's original patch had:
4293 * smp_mb__after_netif_stop_queue();
4294 * but since that doesn't exist yet, just open code it. */
4295 smp_mb();
4296
4297 /* We need to check again in a case another CPU has just
4298 * made room available. */
4299 if (igb_desc_unused(tx_ring) < size)
4300 return -EBUSY;
4301
4302 /* A reprieve! */
4303 netif_wake_subqueue(netdev, tx_ring->queue_index);
4304
4305 u64_stats_update_begin(&tx_ring->tx_syncp2);
4306 tx_ring->tx_stats.restart_queue2++;
4307 u64_stats_update_end(&tx_ring->tx_syncp2);
4308
4309 return 0;
4310 }
4311
4312 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4313 {
4314 if (igb_desc_unused(tx_ring) >= size)
4315 return 0;
4316 return __igb_maybe_stop_tx(tx_ring, size);
4317 }
4318
4319 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4320 struct igb_ring *tx_ring)
4321 {
4322 int tso = 0, count;
4323 u32 tx_flags = 0;
4324 u16 first;
4325 u8 hdr_len = 0;
4326
4327 /* need: 1 descriptor per page,
4328 * + 2 desc gap to keep tail from touching head,
4329 * + 1 desc for skb->data,
4330 * + 1 desc for context descriptor,
4331 * otherwise try next time */
4332 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4333 /* this is a hard error */
4334 return NETDEV_TX_BUSY;
4335 }
4336
4337 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4338 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4339 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4340 }
4341
4342 if (vlan_tx_tag_present(skb)) {
4343 tx_flags |= IGB_TX_FLAGS_VLAN;
4344 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4345 }
4346
4347 if (skb->protocol == htons(ETH_P_IP))
4348 tx_flags |= IGB_TX_FLAGS_IPV4;
4349
4350 first = tx_ring->next_to_use;
4351 if (skb_is_gso(skb)) {
4352 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4353
4354 if (tso < 0) {
4355 dev_kfree_skb_any(skb);
4356 return NETDEV_TX_OK;
4357 }
4358 }
4359
4360 if (tso)
4361 tx_flags |= IGB_TX_FLAGS_TSO;
4362 else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4363 (skb->ip_summed == CHECKSUM_PARTIAL))
4364 tx_flags |= IGB_TX_FLAGS_CSUM;
4365
4366 /*
4367 * count reflects descriptors mapped, if 0 or less then mapping error
4368 * has occurred and we need to rewind the descriptor queue
4369 */
4370 count = igb_tx_map_adv(tx_ring, skb, first);
4371 if (!count) {
4372 dev_kfree_skb_any(skb);
4373 tx_ring->buffer_info[first].time_stamp = 0;
4374 tx_ring->next_to_use = first;
4375 return NETDEV_TX_OK;
4376 }
4377
4378 igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4379
4380 /* Make sure there is space in the ring for the next send. */
4381 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4382
4383 return NETDEV_TX_OK;
4384 }
4385
4386 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4387 struct net_device *netdev)
4388 {
4389 struct igb_adapter *adapter = netdev_priv(netdev);
4390 struct igb_ring *tx_ring;
4391 int r_idx = 0;
4392
4393 if (test_bit(__IGB_DOWN, &adapter->state)) {
4394 dev_kfree_skb_any(skb);
4395 return NETDEV_TX_OK;
4396 }
4397
4398 if (skb->len <= 0) {
4399 dev_kfree_skb_any(skb);
4400 return NETDEV_TX_OK;
4401 }
4402
4403 r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4404 tx_ring = adapter->multi_tx_table[r_idx];
4405
4406 /* This goes back to the question of how to logically map a tx queue
4407 * to a flow. Right now, performance is impacted slightly negatively
4408 * if using multiple tx queues. If the stack breaks away from a
4409 * single qdisc implementation, we can look at this again. */
4410 return igb_xmit_frame_ring_adv(skb, tx_ring);
4411 }
4412
4413 /**
4414 * igb_tx_timeout - Respond to a Tx Hang
4415 * @netdev: network interface device structure
4416 **/
4417 static void igb_tx_timeout(struct net_device *netdev)
4418 {
4419 struct igb_adapter *adapter = netdev_priv(netdev);
4420 struct e1000_hw *hw = &adapter->hw;
4421
4422 /* Do the reset outside of interrupt context */
4423 adapter->tx_timeout_count++;
4424
4425 if (hw->mac.type == e1000_82580)
4426 hw->dev_spec._82575.global_device_reset = true;
4427
4428 schedule_work(&adapter->reset_task);
4429 wr32(E1000_EICS,
4430 (adapter->eims_enable_mask & ~adapter->eims_other));
4431 }
4432
4433 static void igb_reset_task(struct work_struct *work)
4434 {
4435 struct igb_adapter *adapter;
4436 adapter = container_of(work, struct igb_adapter, reset_task);
4437
4438 igb_dump(adapter);
4439 netdev_err(adapter->netdev, "Reset adapter\n");
4440 igb_reinit_locked(adapter);
4441 }
4442
4443 /**
4444 * igb_get_stats64 - Get System Network Statistics
4445 * @netdev: network interface device structure
4446 * @stats: rtnl_link_stats64 pointer
4447 *
4448 **/
4449 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4450 struct rtnl_link_stats64 *stats)
4451 {
4452 struct igb_adapter *adapter = netdev_priv(netdev);
4453
4454 spin_lock(&adapter->stats64_lock);
4455 igb_update_stats(adapter, &adapter->stats64);
4456 memcpy(stats, &adapter->stats64, sizeof(*stats));
4457 spin_unlock(&adapter->stats64_lock);
4458
4459 return stats;
4460 }
4461
4462 /**
4463 * igb_change_mtu - Change the Maximum Transfer Unit
4464 * @netdev: network interface device structure
4465 * @new_mtu: new value for maximum frame size
4466 *
4467 * Returns 0 on success, negative on failure
4468 **/
4469 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4470 {
4471 struct igb_adapter *adapter = netdev_priv(netdev);
4472 struct pci_dev *pdev = adapter->pdev;
4473 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4474 u32 rx_buffer_len, i;
4475
4476 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4477 dev_err(&pdev->dev, "Invalid MTU setting\n");
4478 return -EINVAL;
4479 }
4480
4481 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4482 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4483 return -EINVAL;
4484 }
4485
4486 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4487 msleep(1);
4488
4489 /* igb_down has a dependency on max_frame_size */
4490 adapter->max_frame_size = max_frame;
4491
4492 /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4493 * means we reserve 2 more, this pushes us to allocate from the next
4494 * larger slab size.
4495 * i.e. RXBUFFER_2048 --> size-4096 slab
4496 */
4497
4498 if (adapter->hw.mac.type == e1000_82580)
4499 max_frame += IGB_TS_HDR_LEN;
4500
4501 if (max_frame <= IGB_RXBUFFER_1024)
4502 rx_buffer_len = IGB_RXBUFFER_1024;
4503 else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4504 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4505 else
4506 rx_buffer_len = IGB_RXBUFFER_128;
4507
4508 if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4509 (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4510 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4511
4512 if ((adapter->hw.mac.type == e1000_82580) &&
4513 (rx_buffer_len == IGB_RXBUFFER_128))
4514 rx_buffer_len += IGB_RXBUFFER_64;
4515
4516 if (netif_running(netdev))
4517 igb_down(adapter);
4518
4519 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4520 netdev->mtu, new_mtu);
4521 netdev->mtu = new_mtu;
4522
4523 for (i = 0; i < adapter->num_rx_queues; i++)
4524 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4525
4526 if (netif_running(netdev))
4527 igb_up(adapter);
4528 else
4529 igb_reset(adapter);
4530
4531 clear_bit(__IGB_RESETTING, &adapter->state);
4532
4533 return 0;
4534 }
4535
4536 /**
4537 * igb_update_stats - Update the board statistics counters
4538 * @adapter: board private structure
4539 **/
4540
4541 void igb_update_stats(struct igb_adapter *adapter,
4542 struct rtnl_link_stats64 *net_stats)
4543 {
4544 struct e1000_hw *hw = &adapter->hw;
4545 struct pci_dev *pdev = adapter->pdev;
4546 u32 reg, mpc;
4547 u16 phy_tmp;
4548 int i;
4549 u64 bytes, packets;
4550 unsigned int start;
4551 u64 _bytes, _packets;
4552
4553 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4554
4555 /*
4556 * Prevent stats update while adapter is being reset, or if the pci
4557 * connection is down.
4558 */
4559 if (adapter->link_speed == 0)
4560 return;
4561 if (pci_channel_offline(pdev))
4562 return;
4563
4564 bytes = 0;
4565 packets = 0;
4566 for (i = 0; i < adapter->num_rx_queues; i++) {
4567 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4568 struct igb_ring *ring = adapter->rx_ring[i];
4569
4570 ring->rx_stats.drops += rqdpc_tmp;
4571 net_stats->rx_fifo_errors += rqdpc_tmp;
4572
4573 do {
4574 start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4575 _bytes = ring->rx_stats.bytes;
4576 _packets = ring->rx_stats.packets;
4577 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4578 bytes += _bytes;
4579 packets += _packets;
4580 }
4581
4582 net_stats->rx_bytes = bytes;
4583 net_stats->rx_packets = packets;
4584
4585 bytes = 0;
4586 packets = 0;
4587 for (i = 0; i < adapter->num_tx_queues; i++) {
4588 struct igb_ring *ring = adapter->tx_ring[i];
4589 do {
4590 start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4591 _bytes = ring->tx_stats.bytes;
4592 _packets = ring->tx_stats.packets;
4593 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4594 bytes += _bytes;
4595 packets += _packets;
4596 }
4597 net_stats->tx_bytes = bytes;
4598 net_stats->tx_packets = packets;
4599
4600 /* read stats registers */
4601 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4602 adapter->stats.gprc += rd32(E1000_GPRC);
4603 adapter->stats.gorc += rd32(E1000_GORCL);
4604 rd32(E1000_GORCH); /* clear GORCL */
4605 adapter->stats.bprc += rd32(E1000_BPRC);
4606 adapter->stats.mprc += rd32(E1000_MPRC);
4607 adapter->stats.roc += rd32(E1000_ROC);
4608
4609 adapter->stats.prc64 += rd32(E1000_PRC64);
4610 adapter->stats.prc127 += rd32(E1000_PRC127);
4611 adapter->stats.prc255 += rd32(E1000_PRC255);
4612 adapter->stats.prc511 += rd32(E1000_PRC511);
4613 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4614 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4615 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4616 adapter->stats.sec += rd32(E1000_SEC);
4617
4618 mpc = rd32(E1000_MPC);
4619 adapter->stats.mpc += mpc;
4620 net_stats->rx_fifo_errors += mpc;
4621 adapter->stats.scc += rd32(E1000_SCC);
4622 adapter->stats.ecol += rd32(E1000_ECOL);
4623 adapter->stats.mcc += rd32(E1000_MCC);
4624 adapter->stats.latecol += rd32(E1000_LATECOL);
4625 adapter->stats.dc += rd32(E1000_DC);
4626 adapter->stats.rlec += rd32(E1000_RLEC);
4627 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4628 adapter->stats.xontxc += rd32(E1000_XONTXC);
4629 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4630 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4631 adapter->stats.fcruc += rd32(E1000_FCRUC);
4632 adapter->stats.gptc += rd32(E1000_GPTC);
4633 adapter->stats.gotc += rd32(E1000_GOTCL);
4634 rd32(E1000_GOTCH); /* clear GOTCL */
4635 adapter->stats.rnbc += rd32(E1000_RNBC);
4636 adapter->stats.ruc += rd32(E1000_RUC);
4637 adapter->stats.rfc += rd32(E1000_RFC);
4638 adapter->stats.rjc += rd32(E1000_RJC);
4639 adapter->stats.tor += rd32(E1000_TORH);
4640 adapter->stats.tot += rd32(E1000_TOTH);
4641 adapter->stats.tpr += rd32(E1000_TPR);
4642
4643 adapter->stats.ptc64 += rd32(E1000_PTC64);
4644 adapter->stats.ptc127 += rd32(E1000_PTC127);
4645 adapter->stats.ptc255 += rd32(E1000_PTC255);
4646 adapter->stats.ptc511 += rd32(E1000_PTC511);
4647 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4648 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4649
4650 adapter->stats.mptc += rd32(E1000_MPTC);
4651 adapter->stats.bptc += rd32(E1000_BPTC);
4652
4653 adapter->stats.tpt += rd32(E1000_TPT);
4654 adapter->stats.colc += rd32(E1000_COLC);
4655
4656 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4657 /* read internal phy specific stats */
4658 reg = rd32(E1000_CTRL_EXT);
4659 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4660 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4661 adapter->stats.tncrs += rd32(E1000_TNCRS);
4662 }
4663
4664 adapter->stats.tsctc += rd32(E1000_TSCTC);
4665 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4666
4667 adapter->stats.iac += rd32(E1000_IAC);
4668 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4669 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4670 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4671 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4672 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4673 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4674 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4675 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4676
4677 /* Fill out the OS statistics structure */
4678 net_stats->multicast = adapter->stats.mprc;
4679 net_stats->collisions = adapter->stats.colc;
4680
4681 /* Rx Errors */
4682
4683 /* RLEC on some newer hardware can be incorrect so build
4684 * our own version based on RUC and ROC */
4685 net_stats->rx_errors = adapter->stats.rxerrc +
4686 adapter->stats.crcerrs + adapter->stats.algnerrc +
4687 adapter->stats.ruc + adapter->stats.roc +
4688 adapter->stats.cexterr;
4689 net_stats->rx_length_errors = adapter->stats.ruc +
4690 adapter->stats.roc;
4691 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4692 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4693 net_stats->rx_missed_errors = adapter->stats.mpc;
4694
4695 /* Tx Errors */
4696 net_stats->tx_errors = adapter->stats.ecol +
4697 adapter->stats.latecol;
4698 net_stats->tx_aborted_errors = adapter->stats.ecol;
4699 net_stats->tx_window_errors = adapter->stats.latecol;
4700 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4701
4702 /* Tx Dropped needs to be maintained elsewhere */
4703
4704 /* Phy Stats */
4705 if (hw->phy.media_type == e1000_media_type_copper) {
4706 if ((adapter->link_speed == SPEED_1000) &&
4707 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4708 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4709 adapter->phy_stats.idle_errors += phy_tmp;
4710 }
4711 }
4712
4713 /* Management Stats */
4714 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4715 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4716 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4717
4718 /* OS2BMC Stats */
4719 reg = rd32(E1000_MANC);
4720 if (reg & E1000_MANC_EN_BMC2OS) {
4721 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4722 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4723 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4724 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4725 }
4726 }
4727
4728 static irqreturn_t igb_msix_other(int irq, void *data)
4729 {
4730 struct igb_adapter *adapter = data;
4731 struct e1000_hw *hw = &adapter->hw;
4732 u32 icr = rd32(E1000_ICR);
4733 /* reading ICR causes bit 31 of EICR to be cleared */
4734
4735 if (icr & E1000_ICR_DRSTA)
4736 schedule_work(&adapter->reset_task);
4737
4738 if (icr & E1000_ICR_DOUTSYNC) {
4739 /* HW is reporting DMA is out of sync */
4740 adapter->stats.doosync++;
4741 /* The DMA Out of Sync is also indication of a spoof event
4742 * in IOV mode. Check the Wrong VM Behavior register to
4743 * see if it is really a spoof event. */
4744 igb_check_wvbr(adapter);
4745 }
4746
4747 /* Check for a mailbox event */
4748 if (icr & E1000_ICR_VMMB)
4749 igb_msg_task(adapter);
4750
4751 if (icr & E1000_ICR_LSC) {
4752 hw->mac.get_link_status = 1;
4753 /* guard against interrupt when we're going down */
4754 if (!test_bit(__IGB_DOWN, &adapter->state))
4755 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4756 }
4757
4758 if (adapter->vfs_allocated_count)
4759 wr32(E1000_IMS, E1000_IMS_LSC |
4760 E1000_IMS_VMMB |
4761 E1000_IMS_DOUTSYNC);
4762 else
4763 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4764 wr32(E1000_EIMS, adapter->eims_other);
4765
4766 return IRQ_HANDLED;
4767 }
4768
4769 static void igb_write_itr(struct igb_q_vector *q_vector)
4770 {
4771 struct igb_adapter *adapter = q_vector->adapter;
4772 u32 itr_val = q_vector->itr_val & 0x7FFC;
4773
4774 if (!q_vector->set_itr)
4775 return;
4776
4777 if (!itr_val)
4778 itr_val = 0x4;
4779
4780 if (adapter->hw.mac.type == e1000_82575)
4781 itr_val |= itr_val << 16;
4782 else
4783 itr_val |= 0x8000000;
4784
4785 writel(itr_val, q_vector->itr_register);
4786 q_vector->set_itr = 0;
4787 }
4788
4789 static irqreturn_t igb_msix_ring(int irq, void *data)
4790 {
4791 struct igb_q_vector *q_vector = data;
4792
4793 /* Write the ITR value calculated from the previous interrupt. */
4794 igb_write_itr(q_vector);
4795
4796 napi_schedule(&q_vector->napi);
4797
4798 return IRQ_HANDLED;
4799 }
4800
4801 #ifdef CONFIG_IGB_DCA
4802 static void igb_update_dca(struct igb_q_vector *q_vector)
4803 {
4804 struct igb_adapter *adapter = q_vector->adapter;
4805 struct e1000_hw *hw = &adapter->hw;
4806 int cpu = get_cpu();
4807
4808 if (q_vector->cpu == cpu)
4809 goto out_no_update;
4810
4811 if (q_vector->tx_ring) {
4812 int q = q_vector->tx_ring->reg_idx;
4813 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4814 if (hw->mac.type == e1000_82575) {
4815 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4816 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4817 } else {
4818 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4819 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4820 E1000_DCA_TXCTRL_CPUID_SHIFT;
4821 }
4822 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4823 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4824 }
4825 if (q_vector->rx_ring) {
4826 int q = q_vector->rx_ring->reg_idx;
4827 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4828 if (hw->mac.type == e1000_82575) {
4829 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4830 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4831 } else {
4832 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4833 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4834 E1000_DCA_RXCTRL_CPUID_SHIFT;
4835 }
4836 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4837 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4838 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4839 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4840 }
4841 q_vector->cpu = cpu;
4842 out_no_update:
4843 put_cpu();
4844 }
4845
4846 static void igb_setup_dca(struct igb_adapter *adapter)
4847 {
4848 struct e1000_hw *hw = &adapter->hw;
4849 int i;
4850
4851 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4852 return;
4853
4854 /* Always use CB2 mode, difference is masked in the CB driver. */
4855 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4856
4857 for (i = 0; i < adapter->num_q_vectors; i++) {
4858 adapter->q_vector[i]->cpu = -1;
4859 igb_update_dca(adapter->q_vector[i]);
4860 }
4861 }
4862
4863 static int __igb_notify_dca(struct device *dev, void *data)
4864 {
4865 struct net_device *netdev = dev_get_drvdata(dev);
4866 struct igb_adapter *adapter = netdev_priv(netdev);
4867 struct pci_dev *pdev = adapter->pdev;
4868 struct e1000_hw *hw = &adapter->hw;
4869 unsigned long event = *(unsigned long *)data;
4870
4871 switch (event) {
4872 case DCA_PROVIDER_ADD:
4873 /* if already enabled, don't do it again */
4874 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4875 break;
4876 if (dca_add_requester(dev) == 0) {
4877 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4878 dev_info(&pdev->dev, "DCA enabled\n");
4879 igb_setup_dca(adapter);
4880 break;
4881 }
4882 /* Fall Through since DCA is disabled. */
4883 case DCA_PROVIDER_REMOVE:
4884 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4885 /* without this a class_device is left
4886 * hanging around in the sysfs model */
4887 dca_remove_requester(dev);
4888 dev_info(&pdev->dev, "DCA disabled\n");
4889 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4890 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4891 }
4892 break;
4893 }
4894
4895 return 0;
4896 }
4897
4898 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4899 void *p)
4900 {
4901 int ret_val;
4902
4903 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4904 __igb_notify_dca);
4905
4906 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4907 }
4908 #endif /* CONFIG_IGB_DCA */
4909
4910 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4911 {
4912 struct e1000_hw *hw = &adapter->hw;
4913 u32 ping;
4914 int i;
4915
4916 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4917 ping = E1000_PF_CONTROL_MSG;
4918 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4919 ping |= E1000_VT_MSGTYPE_CTS;
4920 igb_write_mbx(hw, &ping, 1, i);
4921 }
4922 }
4923
4924 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4925 {
4926 struct e1000_hw *hw = &adapter->hw;
4927 u32 vmolr = rd32(E1000_VMOLR(vf));
4928 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4929
4930 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4931 IGB_VF_FLAG_MULTI_PROMISC);
4932 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4933
4934 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4935 vmolr |= E1000_VMOLR_MPME;
4936 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4937 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4938 } else {
4939 /*
4940 * if we have hashes and we are clearing a multicast promisc
4941 * flag we need to write the hashes to the MTA as this step
4942 * was previously skipped
4943 */
4944 if (vf_data->num_vf_mc_hashes > 30) {
4945 vmolr |= E1000_VMOLR_MPME;
4946 } else if (vf_data->num_vf_mc_hashes) {
4947 int j;
4948 vmolr |= E1000_VMOLR_ROMPE;
4949 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4950 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4951 }
4952 }
4953
4954 wr32(E1000_VMOLR(vf), vmolr);
4955
4956 /* there are flags left unprocessed, likely not supported */
4957 if (*msgbuf & E1000_VT_MSGINFO_MASK)
4958 return -EINVAL;
4959
4960 return 0;
4961
4962 }
4963
4964 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4965 u32 *msgbuf, u32 vf)
4966 {
4967 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4968 u16 *hash_list = (u16 *)&msgbuf[1];
4969 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4970 int i;
4971
4972 /* salt away the number of multicast addresses assigned
4973 * to this VF for later use to restore when the PF multi cast
4974 * list changes
4975 */
4976 vf_data->num_vf_mc_hashes = n;
4977
4978 /* only up to 30 hash values supported */
4979 if (n > 30)
4980 n = 30;
4981
4982 /* store the hashes for later use */
4983 for (i = 0; i < n; i++)
4984 vf_data->vf_mc_hashes[i] = hash_list[i];
4985
4986 /* Flush and reset the mta with the new values */
4987 igb_set_rx_mode(adapter->netdev);
4988
4989 return 0;
4990 }
4991
4992 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4993 {
4994 struct e1000_hw *hw = &adapter->hw;
4995 struct vf_data_storage *vf_data;
4996 int i, j;
4997
4998 for (i = 0; i < adapter->vfs_allocated_count; i++) {
4999 u32 vmolr = rd32(E1000_VMOLR(i));
5000 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5001
5002 vf_data = &adapter->vf_data[i];
5003
5004 if ((vf_data->num_vf_mc_hashes > 30) ||
5005 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5006 vmolr |= E1000_VMOLR_MPME;
5007 } else if (vf_data->num_vf_mc_hashes) {
5008 vmolr |= E1000_VMOLR_ROMPE;
5009 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5010 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5011 }
5012 wr32(E1000_VMOLR(i), vmolr);
5013 }
5014 }
5015
5016 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5017 {
5018 struct e1000_hw *hw = &adapter->hw;
5019 u32 pool_mask, reg, vid;
5020 int i;
5021
5022 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5023
5024 /* Find the vlan filter for this id */
5025 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5026 reg = rd32(E1000_VLVF(i));
5027
5028 /* remove the vf from the pool */
5029 reg &= ~pool_mask;
5030
5031 /* if pool is empty then remove entry from vfta */
5032 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5033 (reg & E1000_VLVF_VLANID_ENABLE)) {
5034 reg = 0;
5035 vid = reg & E1000_VLVF_VLANID_MASK;
5036 igb_vfta_set(hw, vid, false);
5037 }
5038
5039 wr32(E1000_VLVF(i), reg);
5040 }
5041
5042 adapter->vf_data[vf].vlans_enabled = 0;
5043 }
5044
5045 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5046 {
5047 struct e1000_hw *hw = &adapter->hw;
5048 u32 reg, i;
5049
5050 /* The vlvf table only exists on 82576 hardware and newer */
5051 if (hw->mac.type < e1000_82576)
5052 return -1;
5053
5054 /* we only need to do this if VMDq is enabled */
5055 if (!adapter->vfs_allocated_count)
5056 return -1;
5057
5058 /* Find the vlan filter for this id */
5059 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5060 reg = rd32(E1000_VLVF(i));
5061 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5062 vid == (reg & E1000_VLVF_VLANID_MASK))
5063 break;
5064 }
5065
5066 if (add) {
5067 if (i == E1000_VLVF_ARRAY_SIZE) {
5068 /* Did not find a matching VLAN ID entry that was
5069 * enabled. Search for a free filter entry, i.e.
5070 * one without the enable bit set
5071 */
5072 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5073 reg = rd32(E1000_VLVF(i));
5074 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5075 break;
5076 }
5077 }
5078 if (i < E1000_VLVF_ARRAY_SIZE) {
5079 /* Found an enabled/available entry */
5080 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5081
5082 /* if !enabled we need to set this up in vfta */
5083 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5084 /* add VID to filter table */
5085 igb_vfta_set(hw, vid, true);
5086 reg |= E1000_VLVF_VLANID_ENABLE;
5087 }
5088 reg &= ~E1000_VLVF_VLANID_MASK;
5089 reg |= vid;
5090 wr32(E1000_VLVF(i), reg);
5091
5092 /* do not modify RLPML for PF devices */
5093 if (vf >= adapter->vfs_allocated_count)
5094 return 0;
5095
5096 if (!adapter->vf_data[vf].vlans_enabled) {
5097 u32 size;
5098 reg = rd32(E1000_VMOLR(vf));
5099 size = reg & E1000_VMOLR_RLPML_MASK;
5100 size += 4;
5101 reg &= ~E1000_VMOLR_RLPML_MASK;
5102 reg |= size;
5103 wr32(E1000_VMOLR(vf), reg);
5104 }
5105
5106 adapter->vf_data[vf].vlans_enabled++;
5107 return 0;
5108 }
5109 } else {
5110 if (i < E1000_VLVF_ARRAY_SIZE) {
5111 /* remove vf from the pool */
5112 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5113 /* if pool is empty then remove entry from vfta */
5114 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5115 reg = 0;
5116 igb_vfta_set(hw, vid, false);
5117 }
5118 wr32(E1000_VLVF(i), reg);
5119
5120 /* do not modify RLPML for PF devices */
5121 if (vf >= adapter->vfs_allocated_count)
5122 return 0;
5123
5124 adapter->vf_data[vf].vlans_enabled--;
5125 if (!adapter->vf_data[vf].vlans_enabled) {
5126 u32 size;
5127 reg = rd32(E1000_VMOLR(vf));
5128 size = reg & E1000_VMOLR_RLPML_MASK;
5129 size -= 4;
5130 reg &= ~E1000_VMOLR_RLPML_MASK;
5131 reg |= size;
5132 wr32(E1000_VMOLR(vf), reg);
5133 }
5134 }
5135 }
5136 return 0;
5137 }
5138
5139 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5140 {
5141 struct e1000_hw *hw = &adapter->hw;
5142
5143 if (vid)
5144 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5145 else
5146 wr32(E1000_VMVIR(vf), 0);
5147 }
5148
5149 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5150 int vf, u16 vlan, u8 qos)
5151 {
5152 int err = 0;
5153 struct igb_adapter *adapter = netdev_priv(netdev);
5154
5155 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5156 return -EINVAL;
5157 if (vlan || qos) {
5158 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5159 if (err)
5160 goto out;
5161 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5162 igb_set_vmolr(adapter, vf, !vlan);
5163 adapter->vf_data[vf].pf_vlan = vlan;
5164 adapter->vf_data[vf].pf_qos = qos;
5165 dev_info(&adapter->pdev->dev,
5166 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5167 if (test_bit(__IGB_DOWN, &adapter->state)) {
5168 dev_warn(&adapter->pdev->dev,
5169 "The VF VLAN has been set,"
5170 " but the PF device is not up.\n");
5171 dev_warn(&adapter->pdev->dev,
5172 "Bring the PF device up before"
5173 " attempting to use the VF device.\n");
5174 }
5175 } else {
5176 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5177 false, vf);
5178 igb_set_vmvir(adapter, vlan, vf);
5179 igb_set_vmolr(adapter, vf, true);
5180 adapter->vf_data[vf].pf_vlan = 0;
5181 adapter->vf_data[vf].pf_qos = 0;
5182 }
5183 out:
5184 return err;
5185 }
5186
5187 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5188 {
5189 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5190 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5191
5192 return igb_vlvf_set(adapter, vid, add, vf);
5193 }
5194
5195 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5196 {
5197 /* clear flags - except flag that indicates PF has set the MAC */
5198 adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5199 adapter->vf_data[vf].last_nack = jiffies;
5200
5201 /* reset offloads to defaults */
5202 igb_set_vmolr(adapter, vf, true);
5203
5204 /* reset vlans for device */
5205 igb_clear_vf_vfta(adapter, vf);
5206 if (adapter->vf_data[vf].pf_vlan)
5207 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5208 adapter->vf_data[vf].pf_vlan,
5209 adapter->vf_data[vf].pf_qos);
5210 else
5211 igb_clear_vf_vfta(adapter, vf);
5212
5213 /* reset multicast table array for vf */
5214 adapter->vf_data[vf].num_vf_mc_hashes = 0;
5215
5216 /* Flush and reset the mta with the new values */
5217 igb_set_rx_mode(adapter->netdev);
5218 }
5219
5220 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5221 {
5222 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5223
5224 /* generate a new mac address as we were hotplug removed/added */
5225 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5226 random_ether_addr(vf_mac);
5227
5228 /* process remaining reset events */
5229 igb_vf_reset(adapter, vf);
5230 }
5231
5232 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5233 {
5234 struct e1000_hw *hw = &adapter->hw;
5235 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5236 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5237 u32 reg, msgbuf[3];
5238 u8 *addr = (u8 *)(&msgbuf[1]);
5239
5240 /* process all the same items cleared in a function level reset */
5241 igb_vf_reset(adapter, vf);
5242
5243 /* set vf mac address */
5244 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5245
5246 /* enable transmit and receive for vf */
5247 reg = rd32(E1000_VFTE);
5248 wr32(E1000_VFTE, reg | (1 << vf));
5249 reg = rd32(E1000_VFRE);
5250 wr32(E1000_VFRE, reg | (1 << vf));
5251
5252 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5253
5254 /* reply to reset with ack and vf mac address */
5255 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5256 memcpy(addr, vf_mac, 6);
5257 igb_write_mbx(hw, msgbuf, 3, vf);
5258 }
5259
5260 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5261 {
5262 /*
5263 * The VF MAC Address is stored in a packed array of bytes
5264 * starting at the second 32 bit word of the msg array
5265 */
5266 unsigned char *addr = (char *)&msg[1];
5267 int err = -1;
5268
5269 if (is_valid_ether_addr(addr))
5270 err = igb_set_vf_mac(adapter, vf, addr);
5271
5272 return err;
5273 }
5274
5275 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5276 {
5277 struct e1000_hw *hw = &adapter->hw;
5278 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5279 u32 msg = E1000_VT_MSGTYPE_NACK;
5280
5281 /* if device isn't clear to send it shouldn't be reading either */
5282 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5283 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5284 igb_write_mbx(hw, &msg, 1, vf);
5285 vf_data->last_nack = jiffies;
5286 }
5287 }
5288
5289 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5290 {
5291 struct pci_dev *pdev = adapter->pdev;
5292 u32 msgbuf[E1000_VFMAILBOX_SIZE];
5293 struct e1000_hw *hw = &adapter->hw;
5294 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5295 s32 retval;
5296
5297 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5298
5299 if (retval) {
5300 /* if receive failed revoke VF CTS stats and restart init */
5301 dev_err(&pdev->dev, "Error receiving message from VF\n");
5302 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5303 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5304 return;
5305 goto out;
5306 }
5307
5308 /* this is a message we already processed, do nothing */
5309 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5310 return;
5311
5312 /*
5313 * until the vf completes a reset it should not be
5314 * allowed to start any configuration.
5315 */
5316
5317 if (msgbuf[0] == E1000_VF_RESET) {
5318 igb_vf_reset_msg(adapter, vf);
5319 return;
5320 }
5321
5322 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5323 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5324 return;
5325 retval = -1;
5326 goto out;
5327 }
5328
5329 switch ((msgbuf[0] & 0xFFFF)) {
5330 case E1000_VF_SET_MAC_ADDR:
5331 retval = -EINVAL;
5332 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5333 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5334 else
5335 dev_warn(&pdev->dev,
5336 "VF %d attempted to override administratively "
5337 "set MAC address\nReload the VF driver to "
5338 "resume operations\n", vf);
5339 break;
5340 case E1000_VF_SET_PROMISC:
5341 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5342 break;
5343 case E1000_VF_SET_MULTICAST:
5344 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5345 break;
5346 case E1000_VF_SET_LPE:
5347 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5348 break;
5349 case E1000_VF_SET_VLAN:
5350 retval = -1;
5351 if (vf_data->pf_vlan)
5352 dev_warn(&pdev->dev,
5353 "VF %d attempted to override administratively "
5354 "set VLAN tag\nReload the VF driver to "
5355 "resume operations\n", vf);
5356 else
5357 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5358 break;
5359 default:
5360 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5361 retval = -1;
5362 break;
5363 }
5364
5365 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5366 out:
5367 /* notify the VF of the results of what it sent us */
5368 if (retval)
5369 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5370 else
5371 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5372
5373 igb_write_mbx(hw, msgbuf, 1, vf);
5374 }
5375
5376 static void igb_msg_task(struct igb_adapter *adapter)
5377 {
5378 struct e1000_hw *hw = &adapter->hw;
5379 u32 vf;
5380
5381 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5382 /* process any reset requests */
5383 if (!igb_check_for_rst(hw, vf))
5384 igb_vf_reset_event(adapter, vf);
5385
5386 /* process any messages pending */
5387 if (!igb_check_for_msg(hw, vf))
5388 igb_rcv_msg_from_vf(adapter, vf);
5389
5390 /* process any acks */
5391 if (!igb_check_for_ack(hw, vf))
5392 igb_rcv_ack_from_vf(adapter, vf);
5393 }
5394 }
5395
5396 /**
5397 * igb_set_uta - Set unicast filter table address
5398 * @adapter: board private structure
5399 *
5400 * The unicast table address is a register array of 32-bit registers.
5401 * The table is meant to be used in a way similar to how the MTA is used
5402 * however due to certain limitations in the hardware it is necessary to
5403 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5404 * enable bit to allow vlan tag stripping when promiscuous mode is enabled
5405 **/
5406 static void igb_set_uta(struct igb_adapter *adapter)
5407 {
5408 struct e1000_hw *hw = &adapter->hw;
5409 int i;
5410
5411 /* The UTA table only exists on 82576 hardware and newer */
5412 if (hw->mac.type < e1000_82576)
5413 return;
5414
5415 /* we only need to do this if VMDq is enabled */
5416 if (!adapter->vfs_allocated_count)
5417 return;
5418
5419 for (i = 0; i < hw->mac.uta_reg_count; i++)
5420 array_wr32(E1000_UTA, i, ~0);
5421 }
5422
5423 /**
5424 * igb_intr_msi - Interrupt Handler
5425 * @irq: interrupt number
5426 * @data: pointer to a network interface device structure
5427 **/
5428 static irqreturn_t igb_intr_msi(int irq, void *data)
5429 {
5430 struct igb_adapter *adapter = data;
5431 struct igb_q_vector *q_vector = adapter->q_vector[0];
5432 struct e1000_hw *hw = &adapter->hw;
5433 /* read ICR disables interrupts using IAM */
5434 u32 icr = rd32(E1000_ICR);
5435
5436 igb_write_itr(q_vector);
5437
5438 if (icr & E1000_ICR_DRSTA)
5439 schedule_work(&adapter->reset_task);
5440
5441 if (icr & E1000_ICR_DOUTSYNC) {
5442 /* HW is reporting DMA is out of sync */
5443 adapter->stats.doosync++;
5444 }
5445
5446 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5447 hw->mac.get_link_status = 1;
5448 if (!test_bit(__IGB_DOWN, &adapter->state))
5449 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5450 }
5451
5452 napi_schedule(&q_vector->napi);
5453
5454 return IRQ_HANDLED;
5455 }
5456
5457 /**
5458 * igb_intr - Legacy Interrupt Handler
5459 * @irq: interrupt number
5460 * @data: pointer to a network interface device structure
5461 **/
5462 static irqreturn_t igb_intr(int irq, void *data)
5463 {
5464 struct igb_adapter *adapter = data;
5465 struct igb_q_vector *q_vector = adapter->q_vector[0];
5466 struct e1000_hw *hw = &adapter->hw;
5467 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5468 * need for the IMC write */
5469 u32 icr = rd32(E1000_ICR);
5470 if (!icr)
5471 return IRQ_NONE; /* Not our interrupt */
5472
5473 igb_write_itr(q_vector);
5474
5475 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5476 * not set, then the adapter didn't send an interrupt */
5477 if (!(icr & E1000_ICR_INT_ASSERTED))
5478 return IRQ_NONE;
5479
5480 if (icr & E1000_ICR_DRSTA)
5481 schedule_work(&adapter->reset_task);
5482
5483 if (icr & E1000_ICR_DOUTSYNC) {
5484 /* HW is reporting DMA is out of sync */
5485 adapter->stats.doosync++;
5486 }
5487
5488 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5489 hw->mac.get_link_status = 1;
5490 /* guard against interrupt when we're going down */
5491 if (!test_bit(__IGB_DOWN, &adapter->state))
5492 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5493 }
5494
5495 napi_schedule(&q_vector->napi);
5496
5497 return IRQ_HANDLED;
5498 }
5499
5500 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5501 {
5502 struct igb_adapter *adapter = q_vector->adapter;
5503 struct e1000_hw *hw = &adapter->hw;
5504
5505 if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5506 (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5507 if (!adapter->msix_entries)
5508 igb_set_itr(adapter);
5509 else
5510 igb_update_ring_itr(q_vector);
5511 }
5512
5513 if (!test_bit(__IGB_DOWN, &adapter->state)) {
5514 if (adapter->msix_entries)
5515 wr32(E1000_EIMS, q_vector->eims_value);
5516 else
5517 igb_irq_enable(adapter);
5518 }
5519 }
5520
5521 /**
5522 * igb_poll - NAPI Rx polling callback
5523 * @napi: napi polling structure
5524 * @budget: count of how many packets we should handle
5525 **/
5526 static int igb_poll(struct napi_struct *napi, int budget)
5527 {
5528 struct igb_q_vector *q_vector = container_of(napi,
5529 struct igb_q_vector,
5530 napi);
5531 int tx_clean_complete = 1, work_done = 0;
5532
5533 #ifdef CONFIG_IGB_DCA
5534 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5535 igb_update_dca(q_vector);
5536 #endif
5537 if (q_vector->tx_ring)
5538 tx_clean_complete = igb_clean_tx_irq(q_vector);
5539
5540 if (q_vector->rx_ring)
5541 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5542
5543 if (!tx_clean_complete)
5544 work_done = budget;
5545
5546 /* If not enough Rx work done, exit the polling mode */
5547 if (work_done < budget) {
5548 napi_complete(napi);
5549 igb_ring_irq_enable(q_vector);
5550 }
5551
5552 return work_done;
5553 }
5554
5555 /**
5556 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5557 * @adapter: board private structure
5558 * @shhwtstamps: timestamp structure to update
5559 * @regval: unsigned 64bit system time value.
5560 *
5561 * We need to convert the system time value stored in the RX/TXSTMP registers
5562 * into a hwtstamp which can be used by the upper level timestamping functions
5563 */
5564 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5565 struct skb_shared_hwtstamps *shhwtstamps,
5566 u64 regval)
5567 {
5568 u64 ns;
5569
5570 /*
5571 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5572 * 24 to match clock shift we setup earlier.
5573 */
5574 if (adapter->hw.mac.type == e1000_82580)
5575 regval <<= IGB_82580_TSYNC_SHIFT;
5576
5577 ns = timecounter_cyc2time(&adapter->clock, regval);
5578 timecompare_update(&adapter->compare, ns);
5579 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5580 shhwtstamps->hwtstamp = ns_to_ktime(ns);
5581 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5582 }
5583
5584 /**
5585 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5586 * @q_vector: pointer to q_vector containing needed info
5587 * @buffer: pointer to igb_buffer structure
5588 *
5589 * If we were asked to do hardware stamping and such a time stamp is
5590 * available, then it must have been for this skb here because we only
5591 * allow only one such packet into the queue.
5592 */
5593 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5594 {
5595 struct igb_adapter *adapter = q_vector->adapter;
5596 struct e1000_hw *hw = &adapter->hw;
5597 struct skb_shared_hwtstamps shhwtstamps;
5598 u64 regval;
5599
5600 /* if skb does not support hw timestamp or TX stamp not valid exit */
5601 if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5602 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5603 return;
5604
5605 regval = rd32(E1000_TXSTMPL);
5606 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5607
5608 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5609 skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5610 }
5611
5612 /**
5613 * igb_clean_tx_irq - Reclaim resources after transmit completes
5614 * @q_vector: pointer to q_vector containing needed info
5615 * returns true if ring is completely cleaned
5616 **/
5617 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5618 {
5619 struct igb_adapter *adapter = q_vector->adapter;
5620 struct igb_ring *tx_ring = q_vector->tx_ring;
5621 struct net_device *netdev = tx_ring->netdev;
5622 struct e1000_hw *hw = &adapter->hw;
5623 struct igb_buffer *buffer_info;
5624 union e1000_adv_tx_desc *tx_desc, *eop_desc;
5625 unsigned int total_bytes = 0, total_packets = 0;
5626 unsigned int i, eop, count = 0;
5627 bool cleaned = false;
5628
5629 i = tx_ring->next_to_clean;
5630 eop = tx_ring->buffer_info[i].next_to_watch;
5631 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5632
5633 while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5634 (count < tx_ring->count)) {
5635 rmb(); /* read buffer_info after eop_desc status */
5636 for (cleaned = false; !cleaned; count++) {
5637 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5638 buffer_info = &tx_ring->buffer_info[i];
5639 cleaned = (i == eop);
5640
5641 if (buffer_info->skb) {
5642 total_bytes += buffer_info->bytecount;
5643 /* gso_segs is currently only valid for tcp */
5644 total_packets += buffer_info->gso_segs;
5645 igb_tx_hwtstamp(q_vector, buffer_info);
5646 }
5647
5648 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5649 tx_desc->wb.status = 0;
5650
5651 i++;
5652 if (i == tx_ring->count)
5653 i = 0;
5654 }
5655 eop = tx_ring->buffer_info[i].next_to_watch;
5656 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5657 }
5658
5659 tx_ring->next_to_clean = i;
5660
5661 if (unlikely(count &&
5662 netif_carrier_ok(netdev) &&
5663 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5664 /* Make sure that anybody stopping the queue after this
5665 * sees the new next_to_clean.
5666 */
5667 smp_mb();
5668 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5669 !(test_bit(__IGB_DOWN, &adapter->state))) {
5670 netif_wake_subqueue(netdev, tx_ring->queue_index);
5671
5672 u64_stats_update_begin(&tx_ring->tx_syncp);
5673 tx_ring->tx_stats.restart_queue++;
5674 u64_stats_update_end(&tx_ring->tx_syncp);
5675 }
5676 }
5677
5678 if (tx_ring->detect_tx_hung) {
5679 /* Detect a transmit hang in hardware, this serializes the
5680 * check with the clearing of time_stamp and movement of i */
5681 tx_ring->detect_tx_hung = false;
5682 if (tx_ring->buffer_info[i].time_stamp &&
5683 time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5684 (adapter->tx_timeout_factor * HZ)) &&
5685 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5686
5687 /* detected Tx unit hang */
5688 dev_err(tx_ring->dev,
5689 "Detected Tx Unit Hang\n"
5690 " Tx Queue <%d>\n"
5691 " TDH <%x>\n"
5692 " TDT <%x>\n"
5693 " next_to_use <%x>\n"
5694 " next_to_clean <%x>\n"
5695 "buffer_info[next_to_clean]\n"
5696 " time_stamp <%lx>\n"
5697 " next_to_watch <%x>\n"
5698 " jiffies <%lx>\n"
5699 " desc.status <%x>\n",
5700 tx_ring->queue_index,
5701 readl(tx_ring->head),
5702 readl(tx_ring->tail),
5703 tx_ring->next_to_use,
5704 tx_ring->next_to_clean,
5705 tx_ring->buffer_info[eop].time_stamp,
5706 eop,
5707 jiffies,
5708 eop_desc->wb.status);
5709 netif_stop_subqueue(netdev, tx_ring->queue_index);
5710 }
5711 }
5712 tx_ring->total_bytes += total_bytes;
5713 tx_ring->total_packets += total_packets;
5714 u64_stats_update_begin(&tx_ring->tx_syncp);
5715 tx_ring->tx_stats.bytes += total_bytes;
5716 tx_ring->tx_stats.packets += total_packets;
5717 u64_stats_update_end(&tx_ring->tx_syncp);
5718 return count < tx_ring->count;
5719 }
5720
5721 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5722 u32 status_err, struct sk_buff *skb)
5723 {
5724 skb_checksum_none_assert(skb);
5725
5726 /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5727 if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5728 (status_err & E1000_RXD_STAT_IXSM))
5729 return;
5730
5731 /* TCP/UDP checksum error bit is set */
5732 if (status_err &
5733 (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5734 /*
5735 * work around errata with sctp packets where the TCPE aka
5736 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5737 * packets, (aka let the stack check the crc32c)
5738 */
5739 if ((skb->len == 60) &&
5740 (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5741 u64_stats_update_begin(&ring->rx_syncp);
5742 ring->rx_stats.csum_err++;
5743 u64_stats_update_end(&ring->rx_syncp);
5744 }
5745 /* let the stack verify checksum errors */
5746 return;
5747 }
5748 /* It must be a TCP or UDP packet with a valid checksum */
5749 if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5750 skb->ip_summed = CHECKSUM_UNNECESSARY;
5751
5752 dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5753 }
5754
5755 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5756 struct sk_buff *skb)
5757 {
5758 struct igb_adapter *adapter = q_vector->adapter;
5759 struct e1000_hw *hw = &adapter->hw;
5760 u64 regval;
5761
5762 /*
5763 * If this bit is set, then the RX registers contain the time stamp. No
5764 * other packet will be time stamped until we read these registers, so
5765 * read the registers to make them available again. Because only one
5766 * packet can be time stamped at a time, we know that the register
5767 * values must belong to this one here and therefore we don't need to
5768 * compare any of the additional attributes stored for it.
5769 *
5770 * If nothing went wrong, then it should have a shared tx_flags that we
5771 * can turn into a skb_shared_hwtstamps.
5772 */
5773 if (staterr & E1000_RXDADV_STAT_TSIP) {
5774 u32 *stamp = (u32 *)skb->data;
5775 regval = le32_to_cpu(*(stamp + 2));
5776 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5777 skb_pull(skb, IGB_TS_HDR_LEN);
5778 } else {
5779 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5780 return;
5781
5782 regval = rd32(E1000_RXSTMPL);
5783 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5784 }
5785
5786 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5787 }
5788 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5789 union e1000_adv_rx_desc *rx_desc)
5790 {
5791 /* HW will not DMA in data larger than the given buffer, even if it
5792 * parses the (NFS, of course) header to be larger. In that case, it
5793 * fills the header buffer and spills the rest into the page.
5794 */
5795 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5796 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5797 if (hlen > rx_ring->rx_buffer_len)
5798 hlen = rx_ring->rx_buffer_len;
5799 return hlen;
5800 }
5801
5802 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5803 int *work_done, int budget)
5804 {
5805 struct igb_ring *rx_ring = q_vector->rx_ring;
5806 struct net_device *netdev = rx_ring->netdev;
5807 struct device *dev = rx_ring->dev;
5808 union e1000_adv_rx_desc *rx_desc , *next_rxd;
5809 struct igb_buffer *buffer_info , *next_buffer;
5810 struct sk_buff *skb;
5811 bool cleaned = false;
5812 int cleaned_count = 0;
5813 int current_node = numa_node_id();
5814 unsigned int total_bytes = 0, total_packets = 0;
5815 unsigned int i;
5816 u32 staterr;
5817 u16 length;
5818
5819 i = rx_ring->next_to_clean;
5820 buffer_info = &rx_ring->buffer_info[i];
5821 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5822 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5823
5824 while (staterr & E1000_RXD_STAT_DD) {
5825 if (*work_done >= budget)
5826 break;
5827 (*work_done)++;
5828 rmb(); /* read descriptor and rx_buffer_info after status DD */
5829
5830 skb = buffer_info->skb;
5831 prefetch(skb->data - NET_IP_ALIGN);
5832 buffer_info->skb = NULL;
5833
5834 i++;
5835 if (i == rx_ring->count)
5836 i = 0;
5837
5838 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5839 prefetch(next_rxd);
5840 next_buffer = &rx_ring->buffer_info[i];
5841
5842 length = le16_to_cpu(rx_desc->wb.upper.length);
5843 cleaned = true;
5844 cleaned_count++;
5845
5846 if (buffer_info->dma) {
5847 dma_unmap_single(dev, buffer_info->dma,
5848 rx_ring->rx_buffer_len,
5849 DMA_FROM_DEVICE);
5850 buffer_info->dma = 0;
5851 if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5852 skb_put(skb, length);
5853 goto send_up;
5854 }
5855 skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5856 }
5857
5858 if (length) {
5859 dma_unmap_page(dev, buffer_info->page_dma,
5860 PAGE_SIZE / 2, DMA_FROM_DEVICE);
5861 buffer_info->page_dma = 0;
5862
5863 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5864 buffer_info->page,
5865 buffer_info->page_offset,
5866 length);
5867
5868 if ((page_count(buffer_info->page) != 1) ||
5869 (page_to_nid(buffer_info->page) != current_node))
5870 buffer_info->page = NULL;
5871 else
5872 get_page(buffer_info->page);
5873
5874 skb->len += length;
5875 skb->data_len += length;
5876 skb->truesize += length;
5877 }
5878
5879 if (!(staterr & E1000_RXD_STAT_EOP)) {
5880 buffer_info->skb = next_buffer->skb;
5881 buffer_info->dma = next_buffer->dma;
5882 next_buffer->skb = skb;
5883 next_buffer->dma = 0;
5884 goto next_desc;
5885 }
5886 send_up:
5887 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5888 dev_kfree_skb_irq(skb);
5889 goto next_desc;
5890 }
5891
5892 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5893 igb_rx_hwtstamp(q_vector, staterr, skb);
5894 total_bytes += skb->len;
5895 total_packets++;
5896
5897 igb_rx_checksum_adv(rx_ring, staterr, skb);
5898
5899 skb->protocol = eth_type_trans(skb, netdev);
5900 skb_record_rx_queue(skb, rx_ring->queue_index);
5901
5902 if (staterr & E1000_RXD_STAT_VP) {
5903 u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5904
5905 __vlan_hwaccel_put_tag(skb, vid);
5906 }
5907 napi_gro_receive(&q_vector->napi, skb);
5908
5909 next_desc:
5910 rx_desc->wb.upper.status_error = 0;
5911
5912 /* return some buffers to hardware, one at a time is too slow */
5913 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5914 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5915 cleaned_count = 0;
5916 }
5917
5918 /* use prefetched values */
5919 rx_desc = next_rxd;
5920 buffer_info = next_buffer;
5921 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5922 }
5923
5924 rx_ring->next_to_clean = i;
5925 cleaned_count = igb_desc_unused(rx_ring);
5926
5927 if (cleaned_count)
5928 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5929
5930 rx_ring->total_packets += total_packets;
5931 rx_ring->total_bytes += total_bytes;
5932 u64_stats_update_begin(&rx_ring->rx_syncp);
5933 rx_ring->rx_stats.packets += total_packets;
5934 rx_ring->rx_stats.bytes += total_bytes;
5935 u64_stats_update_end(&rx_ring->rx_syncp);
5936 return cleaned;
5937 }
5938
5939 /**
5940 * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5941 * @adapter: address of board private structure
5942 **/
5943 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5944 {
5945 struct net_device *netdev = rx_ring->netdev;
5946 union e1000_adv_rx_desc *rx_desc;
5947 struct igb_buffer *buffer_info;
5948 struct sk_buff *skb;
5949 unsigned int i;
5950 int bufsz;
5951
5952 i = rx_ring->next_to_use;
5953 buffer_info = &rx_ring->buffer_info[i];
5954
5955 bufsz = rx_ring->rx_buffer_len;
5956
5957 while (cleaned_count--) {
5958 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5959
5960 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5961 if (!buffer_info->page) {
5962 buffer_info->page = netdev_alloc_page(netdev);
5963 if (unlikely(!buffer_info->page)) {
5964 u64_stats_update_begin(&rx_ring->rx_syncp);
5965 rx_ring->rx_stats.alloc_failed++;
5966 u64_stats_update_end(&rx_ring->rx_syncp);
5967 goto no_buffers;
5968 }
5969 buffer_info->page_offset = 0;
5970 } else {
5971 buffer_info->page_offset ^= PAGE_SIZE / 2;
5972 }
5973 buffer_info->page_dma =
5974 dma_map_page(rx_ring->dev, buffer_info->page,
5975 buffer_info->page_offset,
5976 PAGE_SIZE / 2,
5977 DMA_FROM_DEVICE);
5978 if (dma_mapping_error(rx_ring->dev,
5979 buffer_info->page_dma)) {
5980 buffer_info->page_dma = 0;
5981 u64_stats_update_begin(&rx_ring->rx_syncp);
5982 rx_ring->rx_stats.alloc_failed++;
5983 u64_stats_update_end(&rx_ring->rx_syncp);
5984 goto no_buffers;
5985 }
5986 }
5987
5988 skb = buffer_info->skb;
5989 if (!skb) {
5990 skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5991 if (unlikely(!skb)) {
5992 u64_stats_update_begin(&rx_ring->rx_syncp);
5993 rx_ring->rx_stats.alloc_failed++;
5994 u64_stats_update_end(&rx_ring->rx_syncp);
5995 goto no_buffers;
5996 }
5997
5998 buffer_info->skb = skb;
5999 }
6000 if (!buffer_info->dma) {
6001 buffer_info->dma = dma_map_single(rx_ring->dev,
6002 skb->data,
6003 bufsz,
6004 DMA_FROM_DEVICE);
6005 if (dma_mapping_error(rx_ring->dev,
6006 buffer_info->dma)) {
6007 buffer_info->dma = 0;
6008 u64_stats_update_begin(&rx_ring->rx_syncp);
6009 rx_ring->rx_stats.alloc_failed++;
6010 u64_stats_update_end(&rx_ring->rx_syncp);
6011 goto no_buffers;
6012 }
6013 }
6014 /* Refresh the desc even if buffer_addrs didn't change because
6015 * each write-back erases this info. */
6016 if (bufsz < IGB_RXBUFFER_1024) {
6017 rx_desc->read.pkt_addr =
6018 cpu_to_le64(buffer_info->page_dma);
6019 rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
6020 } else {
6021 rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
6022 rx_desc->read.hdr_addr = 0;
6023 }
6024
6025 i++;
6026 if (i == rx_ring->count)
6027 i = 0;
6028 buffer_info = &rx_ring->buffer_info[i];
6029 }
6030
6031 no_buffers:
6032 if (rx_ring->next_to_use != i) {
6033 rx_ring->next_to_use = i;
6034 if (i == 0)
6035 i = (rx_ring->count - 1);
6036 else
6037 i--;
6038
6039 /* Force memory writes to complete before letting h/w
6040 * know there are new descriptors to fetch. (Only
6041 * applicable for weak-ordered memory model archs,
6042 * such as IA-64). */
6043 wmb();
6044 writel(i, rx_ring->tail);
6045 }
6046 }
6047
6048 /**
6049 * igb_mii_ioctl -
6050 * @netdev:
6051 * @ifreq:
6052 * @cmd:
6053 **/
6054 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6055 {
6056 struct igb_adapter *adapter = netdev_priv(netdev);
6057 struct mii_ioctl_data *data = if_mii(ifr);
6058
6059 if (adapter->hw.phy.media_type != e1000_media_type_copper)
6060 return -EOPNOTSUPP;
6061
6062 switch (cmd) {
6063 case SIOCGMIIPHY:
6064 data->phy_id = adapter->hw.phy.addr;
6065 break;
6066 case SIOCGMIIREG:
6067 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6068 &data->val_out))
6069 return -EIO;
6070 break;
6071 case SIOCSMIIREG:
6072 default:
6073 return -EOPNOTSUPP;
6074 }
6075 return 0;
6076 }
6077
6078 /**
6079 * igb_hwtstamp_ioctl - control hardware time stamping
6080 * @netdev:
6081 * @ifreq:
6082 * @cmd:
6083 *
6084 * Outgoing time stamping can be enabled and disabled. Play nice and
6085 * disable it when requested, although it shouldn't case any overhead
6086 * when no packet needs it. At most one packet in the queue may be
6087 * marked for time stamping, otherwise it would be impossible to tell
6088 * for sure to which packet the hardware time stamp belongs.
6089 *
6090 * Incoming time stamping has to be configured via the hardware
6091 * filters. Not all combinations are supported, in particular event
6092 * type has to be specified. Matching the kind of event packet is
6093 * not supported, with the exception of "all V2 events regardless of
6094 * level 2 or 4".
6095 *
6096 **/
6097 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6098 struct ifreq *ifr, int cmd)
6099 {
6100 struct igb_adapter *adapter = netdev_priv(netdev);
6101 struct e1000_hw *hw = &adapter->hw;
6102 struct hwtstamp_config config;
6103 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6104 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6105 u32 tsync_rx_cfg = 0;
6106 bool is_l4 = false;
6107 bool is_l2 = false;
6108 u32 regval;
6109
6110 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6111 return -EFAULT;
6112
6113 /* reserved for future extensions */
6114 if (config.flags)
6115 return -EINVAL;
6116
6117 switch (config.tx_type) {
6118 case HWTSTAMP_TX_OFF:
6119 tsync_tx_ctl = 0;
6120 case HWTSTAMP_TX_ON:
6121 break;
6122 default:
6123 return -ERANGE;
6124 }
6125
6126 switch (config.rx_filter) {
6127 case HWTSTAMP_FILTER_NONE:
6128 tsync_rx_ctl = 0;
6129 break;
6130 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6131 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6132 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6133 case HWTSTAMP_FILTER_ALL:
6134 /*
6135 * register TSYNCRXCFG must be set, therefore it is not
6136 * possible to time stamp both Sync and Delay_Req messages
6137 * => fall back to time stamping all packets
6138 */
6139 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6140 config.rx_filter = HWTSTAMP_FILTER_ALL;
6141 break;
6142 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6143 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6144 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6145 is_l4 = true;
6146 break;
6147 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6148 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6149 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6150 is_l4 = true;
6151 break;
6152 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6153 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6154 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6155 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6156 is_l2 = true;
6157 is_l4 = true;
6158 config.rx_filter = HWTSTAMP_FILTER_SOME;
6159 break;
6160 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6161 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6162 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6163 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6164 is_l2 = true;
6165 is_l4 = true;
6166 config.rx_filter = HWTSTAMP_FILTER_SOME;
6167 break;
6168 case HWTSTAMP_FILTER_PTP_V2_EVENT:
6169 case HWTSTAMP_FILTER_PTP_V2_SYNC:
6170 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6171 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6172 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6173 is_l2 = true;
6174 break;
6175 default:
6176 return -ERANGE;
6177 }
6178
6179 if (hw->mac.type == e1000_82575) {
6180 if (tsync_rx_ctl | tsync_tx_ctl)
6181 return -EINVAL;
6182 return 0;
6183 }
6184
6185 /*
6186 * Per-packet timestamping only works if all packets are
6187 * timestamped, so enable timestamping in all packets as
6188 * long as one rx filter was configured.
6189 */
6190 if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6191 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6192 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6193 }
6194
6195 /* enable/disable TX */
6196 regval = rd32(E1000_TSYNCTXCTL);
6197 regval &= ~E1000_TSYNCTXCTL_ENABLED;
6198 regval |= tsync_tx_ctl;
6199 wr32(E1000_TSYNCTXCTL, regval);
6200
6201 /* enable/disable RX */
6202 regval = rd32(E1000_TSYNCRXCTL);
6203 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6204 regval |= tsync_rx_ctl;
6205 wr32(E1000_TSYNCRXCTL, regval);
6206
6207 /* define which PTP packets are time stamped */
6208 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6209
6210 /* define ethertype filter for timestamped packets */
6211 if (is_l2)
6212 wr32(E1000_ETQF(3),
6213 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6214 E1000_ETQF_1588 | /* enable timestamping */
6215 ETH_P_1588)); /* 1588 eth protocol type */
6216 else
6217 wr32(E1000_ETQF(3), 0);
6218
6219 #define PTP_PORT 319
6220 /* L4 Queue Filter[3]: filter by destination port and protocol */
6221 if (is_l4) {
6222 u32 ftqf = (IPPROTO_UDP /* UDP */
6223 | E1000_FTQF_VF_BP /* VF not compared */
6224 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6225 | E1000_FTQF_MASK); /* mask all inputs */
6226 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6227
6228 wr32(E1000_IMIR(3), htons(PTP_PORT));
6229 wr32(E1000_IMIREXT(3),
6230 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6231 if (hw->mac.type == e1000_82576) {
6232 /* enable source port check */
6233 wr32(E1000_SPQF(3), htons(PTP_PORT));
6234 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6235 }
6236 wr32(E1000_FTQF(3), ftqf);
6237 } else {
6238 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6239 }
6240 wrfl();
6241
6242 adapter->hwtstamp_config = config;
6243
6244 /* clear TX/RX time stamp registers, just to be sure */
6245 regval = rd32(E1000_TXSTMPH);
6246 regval = rd32(E1000_RXSTMPH);
6247
6248 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6249 -EFAULT : 0;
6250 }
6251
6252 /**
6253 * igb_ioctl -
6254 * @netdev:
6255 * @ifreq:
6256 * @cmd:
6257 **/
6258 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6259 {
6260 switch (cmd) {
6261 case SIOCGMIIPHY:
6262 case SIOCGMIIREG:
6263 case SIOCSMIIREG:
6264 return igb_mii_ioctl(netdev, ifr, cmd);
6265 case SIOCSHWTSTAMP:
6266 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6267 default:
6268 return -EOPNOTSUPP;
6269 }
6270 }
6271
6272 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6273 {
6274 struct igb_adapter *adapter = hw->back;
6275 u16 cap_offset;
6276
6277 cap_offset = adapter->pdev->pcie_cap;
6278 if (!cap_offset)
6279 return -E1000_ERR_CONFIG;
6280
6281 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6282
6283 return 0;
6284 }
6285
6286 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6287 {
6288 struct igb_adapter *adapter = hw->back;
6289 u16 cap_offset;
6290
6291 cap_offset = adapter->pdev->pcie_cap;
6292 if (!cap_offset)
6293 return -E1000_ERR_CONFIG;
6294
6295 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6296
6297 return 0;
6298 }
6299
6300 static void igb_vlan_mode(struct net_device *netdev, u32 features)
6301 {
6302 struct igb_adapter *adapter = netdev_priv(netdev);
6303 struct e1000_hw *hw = &adapter->hw;
6304 u32 ctrl, rctl;
6305
6306 igb_irq_disable(adapter);
6307
6308 if (features & NETIF_F_HW_VLAN_RX) {
6309 /* enable VLAN tag insert/strip */
6310 ctrl = rd32(E1000_CTRL);
6311 ctrl |= E1000_CTRL_VME;
6312 wr32(E1000_CTRL, ctrl);
6313
6314 /* Disable CFI check */
6315 rctl = rd32(E1000_RCTL);
6316 rctl &= ~E1000_RCTL_CFIEN;
6317 wr32(E1000_RCTL, rctl);
6318 } else {
6319 /* disable VLAN tag insert/strip */
6320 ctrl = rd32(E1000_CTRL);
6321 ctrl &= ~E1000_CTRL_VME;
6322 wr32(E1000_CTRL, ctrl);
6323 }
6324
6325 igb_rlpml_set(adapter);
6326
6327 if (!test_bit(__IGB_DOWN, &adapter->state))
6328 igb_irq_enable(adapter);
6329 }
6330
6331 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6332 {
6333 struct igb_adapter *adapter = netdev_priv(netdev);
6334 struct e1000_hw *hw = &adapter->hw;
6335 int pf_id = adapter->vfs_allocated_count;
6336
6337 /* attempt to add filter to vlvf array */
6338 igb_vlvf_set(adapter, vid, true, pf_id);
6339
6340 /* add the filter since PF can receive vlans w/o entry in vlvf */
6341 igb_vfta_set(hw, vid, true);
6342
6343 set_bit(vid, adapter->active_vlans);
6344 }
6345
6346 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6347 {
6348 struct igb_adapter *adapter = netdev_priv(netdev);
6349 struct e1000_hw *hw = &adapter->hw;
6350 int pf_id = adapter->vfs_allocated_count;
6351 s32 err;
6352
6353 igb_irq_disable(adapter);
6354
6355 if (!test_bit(__IGB_DOWN, &adapter->state))
6356 igb_irq_enable(adapter);
6357
6358 /* remove vlan from VLVF table array */
6359 err = igb_vlvf_set(adapter, vid, false, pf_id);
6360
6361 /* if vid was not present in VLVF just remove it from table */
6362 if (err)
6363 igb_vfta_set(hw, vid, false);
6364
6365 clear_bit(vid, adapter->active_vlans);
6366 }
6367
6368 static void igb_restore_vlan(struct igb_adapter *adapter)
6369 {
6370 u16 vid;
6371
6372 for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6373 igb_vlan_rx_add_vid(adapter->netdev, vid);
6374 }
6375
6376 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6377 {
6378 struct pci_dev *pdev = adapter->pdev;
6379 struct e1000_mac_info *mac = &adapter->hw.mac;
6380
6381 mac->autoneg = 0;
6382
6383 /* Make sure dplx is at most 1 bit and lsb of speed is not set
6384 * for the switch() below to work */
6385 if ((spd & 1) || (dplx & ~1))
6386 goto err_inval;
6387
6388 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6389 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6390 spd != SPEED_1000 &&
6391 dplx != DUPLEX_FULL)
6392 goto err_inval;
6393
6394 switch (spd + dplx) {
6395 case SPEED_10 + DUPLEX_HALF:
6396 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6397 break;
6398 case SPEED_10 + DUPLEX_FULL:
6399 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6400 break;
6401 case SPEED_100 + DUPLEX_HALF:
6402 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6403 break;
6404 case SPEED_100 + DUPLEX_FULL:
6405 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6406 break;
6407 case SPEED_1000 + DUPLEX_FULL:
6408 mac->autoneg = 1;
6409 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6410 break;
6411 case SPEED_1000 + DUPLEX_HALF: /* not supported */
6412 default:
6413 goto err_inval;
6414 }
6415 return 0;
6416
6417 err_inval:
6418 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6419 return -EINVAL;
6420 }
6421
6422 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6423 {
6424 struct net_device *netdev = pci_get_drvdata(pdev);
6425 struct igb_adapter *adapter = netdev_priv(netdev);
6426 struct e1000_hw *hw = &adapter->hw;
6427 u32 ctrl, rctl, status;
6428 u32 wufc = adapter->wol;
6429 #ifdef CONFIG_PM
6430 int retval = 0;
6431 #endif
6432
6433 netif_device_detach(netdev);
6434
6435 if (netif_running(netdev))
6436 igb_close(netdev);
6437
6438 igb_clear_interrupt_scheme(adapter);
6439
6440 #ifdef CONFIG_PM
6441 retval = pci_save_state(pdev);
6442 if (retval)
6443 return retval;
6444 #endif
6445
6446 status = rd32(E1000_STATUS);
6447 if (status & E1000_STATUS_LU)
6448 wufc &= ~E1000_WUFC_LNKC;
6449
6450 if (wufc) {
6451 igb_setup_rctl(adapter);
6452 igb_set_rx_mode(netdev);
6453
6454 /* turn on all-multi mode if wake on multicast is enabled */
6455 if (wufc & E1000_WUFC_MC) {
6456 rctl = rd32(E1000_RCTL);
6457 rctl |= E1000_RCTL_MPE;
6458 wr32(E1000_RCTL, rctl);
6459 }
6460
6461 ctrl = rd32(E1000_CTRL);
6462 /* advertise wake from D3Cold */
6463 #define E1000_CTRL_ADVD3WUC 0x00100000
6464 /* phy power management enable */
6465 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6466 ctrl |= E1000_CTRL_ADVD3WUC;
6467 wr32(E1000_CTRL, ctrl);
6468
6469 /* Allow time for pending master requests to run */
6470 igb_disable_pcie_master(hw);
6471
6472 wr32(E1000_WUC, E1000_WUC_PME_EN);
6473 wr32(E1000_WUFC, wufc);
6474 } else {
6475 wr32(E1000_WUC, 0);
6476 wr32(E1000_WUFC, 0);
6477 }
6478
6479 *enable_wake = wufc || adapter->en_mng_pt;
6480 if (!*enable_wake)
6481 igb_power_down_link(adapter);
6482 else
6483 igb_power_up_link(adapter);
6484
6485 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6486 * would have already happened in close and is redundant. */
6487 igb_release_hw_control(adapter);
6488
6489 pci_disable_device(pdev);
6490
6491 return 0;
6492 }
6493
6494 #ifdef CONFIG_PM
6495 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6496 {
6497 int retval;
6498 bool wake;
6499
6500 retval = __igb_shutdown(pdev, &wake);
6501 if (retval)
6502 return retval;
6503
6504 if (wake) {
6505 pci_prepare_to_sleep(pdev);
6506 } else {
6507 pci_wake_from_d3(pdev, false);
6508 pci_set_power_state(pdev, PCI_D3hot);
6509 }
6510
6511 return 0;
6512 }
6513
6514 static int igb_resume(struct pci_dev *pdev)
6515 {
6516 struct net_device *netdev = pci_get_drvdata(pdev);
6517 struct igb_adapter *adapter = netdev_priv(netdev);
6518 struct e1000_hw *hw = &adapter->hw;
6519 u32 err;
6520
6521 pci_set_power_state(pdev, PCI_D0);
6522 pci_restore_state(pdev);
6523 pci_save_state(pdev);
6524
6525 err = pci_enable_device_mem(pdev);
6526 if (err) {
6527 dev_err(&pdev->dev,
6528 "igb: Cannot enable PCI device from suspend\n");
6529 return err;
6530 }
6531 pci_set_master(pdev);
6532
6533 pci_enable_wake(pdev, PCI_D3hot, 0);
6534 pci_enable_wake(pdev, PCI_D3cold, 0);
6535
6536 if (igb_init_interrupt_scheme(adapter)) {
6537 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6538 return -ENOMEM;
6539 }
6540
6541 igb_reset(adapter);
6542
6543 /* let the f/w know that the h/w is now under the control of the
6544 * driver. */
6545 igb_get_hw_control(adapter);
6546
6547 wr32(E1000_WUS, ~0);
6548
6549 if (netif_running(netdev)) {
6550 err = igb_open(netdev);
6551 if (err)
6552 return err;
6553 }
6554
6555 netif_device_attach(netdev);
6556
6557 return 0;
6558 }
6559 #endif
6560
6561 static void igb_shutdown(struct pci_dev *pdev)
6562 {
6563 bool wake;
6564
6565 __igb_shutdown(pdev, &wake);
6566
6567 if (system_state == SYSTEM_POWER_OFF) {
6568 pci_wake_from_d3(pdev, wake);
6569 pci_set_power_state(pdev, PCI_D3hot);
6570 }
6571 }
6572
6573 #ifdef CONFIG_NET_POLL_CONTROLLER
6574 /*
6575 * Polling 'interrupt' - used by things like netconsole to send skbs
6576 * without having to re-enable interrupts. It's not called while
6577 * the interrupt routine is executing.
6578 */
6579 static void igb_netpoll(struct net_device *netdev)
6580 {
6581 struct igb_adapter *adapter = netdev_priv(netdev);
6582 struct e1000_hw *hw = &adapter->hw;
6583 int i;
6584
6585 if (!adapter->msix_entries) {
6586 struct igb_q_vector *q_vector = adapter->q_vector[0];
6587 igb_irq_disable(adapter);
6588 napi_schedule(&q_vector->napi);
6589 return;
6590 }
6591
6592 for (i = 0; i < adapter->num_q_vectors; i++) {
6593 struct igb_q_vector *q_vector = adapter->q_vector[i];
6594 wr32(E1000_EIMC, q_vector->eims_value);
6595 napi_schedule(&q_vector->napi);
6596 }
6597 }
6598 #endif /* CONFIG_NET_POLL_CONTROLLER */
6599
6600 /**
6601 * igb_io_error_detected - called when PCI error is detected
6602 * @pdev: Pointer to PCI device
6603 * @state: The current pci connection state
6604 *
6605 * This function is called after a PCI bus error affecting
6606 * this device has been detected.
6607 */
6608 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6609 pci_channel_state_t state)
6610 {
6611 struct net_device *netdev = pci_get_drvdata(pdev);
6612 struct igb_adapter *adapter = netdev_priv(netdev);
6613
6614 netif_device_detach(netdev);
6615
6616 if (state == pci_channel_io_perm_failure)
6617 return PCI_ERS_RESULT_DISCONNECT;
6618
6619 if (netif_running(netdev))
6620 igb_down(adapter);
6621 pci_disable_device(pdev);
6622
6623 /* Request a slot slot reset. */
6624 return PCI_ERS_RESULT_NEED_RESET;
6625 }
6626
6627 /**
6628 * igb_io_slot_reset - called after the pci bus has been reset.
6629 * @pdev: Pointer to PCI device
6630 *
6631 * Restart the card from scratch, as if from a cold-boot. Implementation
6632 * resembles the first-half of the igb_resume routine.
6633 */
6634 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6635 {
6636 struct net_device *netdev = pci_get_drvdata(pdev);
6637 struct igb_adapter *adapter = netdev_priv(netdev);
6638 struct e1000_hw *hw = &adapter->hw;
6639 pci_ers_result_t result;
6640 int err;
6641
6642 if (pci_enable_device_mem(pdev)) {
6643 dev_err(&pdev->dev,
6644 "Cannot re-enable PCI device after reset.\n");
6645 result = PCI_ERS_RESULT_DISCONNECT;
6646 } else {
6647 pci_set_master(pdev);
6648 pci_restore_state(pdev);
6649 pci_save_state(pdev);
6650
6651 pci_enable_wake(pdev, PCI_D3hot, 0);
6652 pci_enable_wake(pdev, PCI_D3cold, 0);
6653
6654 igb_reset(adapter);
6655 wr32(E1000_WUS, ~0);
6656 result = PCI_ERS_RESULT_RECOVERED;
6657 }
6658
6659 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6660 if (err) {
6661 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6662 "failed 0x%0x\n", err);
6663 /* non-fatal, continue */
6664 }
6665
6666 return result;
6667 }
6668
6669 /**
6670 * igb_io_resume - called when traffic can start flowing again.
6671 * @pdev: Pointer to PCI device
6672 *
6673 * This callback is called when the error recovery driver tells us that
6674 * its OK to resume normal operation. Implementation resembles the
6675 * second-half of the igb_resume routine.
6676 */
6677 static void igb_io_resume(struct pci_dev *pdev)
6678 {
6679 struct net_device *netdev = pci_get_drvdata(pdev);
6680 struct igb_adapter *adapter = netdev_priv(netdev);
6681
6682 if (netif_running(netdev)) {
6683 if (igb_up(adapter)) {
6684 dev_err(&pdev->dev, "igb_up failed after reset\n");
6685 return;
6686 }
6687 }
6688
6689 netif_device_attach(netdev);
6690
6691 /* let the f/w know that the h/w is now under the control of the
6692 * driver. */
6693 igb_get_hw_control(adapter);
6694 }
6695
6696 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6697 u8 qsel)
6698 {
6699 u32 rar_low, rar_high;
6700 struct e1000_hw *hw = &adapter->hw;
6701
6702 /* HW expects these in little endian so we reverse the byte order
6703 * from network order (big endian) to little endian
6704 */
6705 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6706 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6707 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6708
6709 /* Indicate to hardware the Address is Valid. */
6710 rar_high |= E1000_RAH_AV;
6711
6712 if (hw->mac.type == e1000_82575)
6713 rar_high |= E1000_RAH_POOL_1 * qsel;
6714 else
6715 rar_high |= E1000_RAH_POOL_1 << qsel;
6716
6717 wr32(E1000_RAL(index), rar_low);
6718 wrfl();
6719 wr32(E1000_RAH(index), rar_high);
6720 wrfl();
6721 }
6722
6723 static int igb_set_vf_mac(struct igb_adapter *adapter,
6724 int vf, unsigned char *mac_addr)
6725 {
6726 struct e1000_hw *hw = &adapter->hw;
6727 /* VF MAC addresses start at end of receive addresses and moves
6728 * torwards the first, as a result a collision should not be possible */
6729 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6730
6731 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6732
6733 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6734
6735 return 0;
6736 }
6737
6738 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6739 {
6740 struct igb_adapter *adapter = netdev_priv(netdev);
6741 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6742 return -EINVAL;
6743 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6744 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6745 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6746 " change effective.");
6747 if (test_bit(__IGB_DOWN, &adapter->state)) {
6748 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6749 " but the PF device is not up.\n");
6750 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6751 " attempting to use the VF device.\n");
6752 }
6753 return igb_set_vf_mac(adapter, vf, mac);
6754 }
6755
6756 static int igb_link_mbps(int internal_link_speed)
6757 {
6758 switch (internal_link_speed) {
6759 case SPEED_100:
6760 return 100;
6761 case SPEED_1000:
6762 return 1000;
6763 default:
6764 return 0;
6765 }
6766 }
6767
6768 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6769 int link_speed)
6770 {
6771 int rf_dec, rf_int;
6772 u32 bcnrc_val;
6773
6774 if (tx_rate != 0) {
6775 /* Calculate the rate factor values to set */
6776 rf_int = link_speed / tx_rate;
6777 rf_dec = (link_speed - (rf_int * tx_rate));
6778 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6779
6780 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6781 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6782 E1000_RTTBCNRC_RF_INT_MASK);
6783 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6784 } else {
6785 bcnrc_val = 0;
6786 }
6787
6788 wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6789 wr32(E1000_RTTBCNRC, bcnrc_val);
6790 }
6791
6792 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6793 {
6794 int actual_link_speed, i;
6795 bool reset_rate = false;
6796
6797 /* VF TX rate limit was not set or not supported */
6798 if ((adapter->vf_rate_link_speed == 0) ||
6799 (adapter->hw.mac.type != e1000_82576))
6800 return;
6801
6802 actual_link_speed = igb_link_mbps(adapter->link_speed);
6803 if (actual_link_speed != adapter->vf_rate_link_speed) {
6804 reset_rate = true;
6805 adapter->vf_rate_link_speed = 0;
6806 dev_info(&adapter->pdev->dev,
6807 "Link speed has been changed. VF Transmit "
6808 "rate is disabled\n");
6809 }
6810
6811 for (i = 0; i < adapter->vfs_allocated_count; i++) {
6812 if (reset_rate)
6813 adapter->vf_data[i].tx_rate = 0;
6814
6815 igb_set_vf_rate_limit(&adapter->hw, i,
6816 adapter->vf_data[i].tx_rate,
6817 actual_link_speed);
6818 }
6819 }
6820
6821 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6822 {
6823 struct igb_adapter *adapter = netdev_priv(netdev);
6824 struct e1000_hw *hw = &adapter->hw;
6825 int actual_link_speed;
6826
6827 if (hw->mac.type != e1000_82576)
6828 return -EOPNOTSUPP;
6829
6830 actual_link_speed = igb_link_mbps(adapter->link_speed);
6831 if ((vf >= adapter->vfs_allocated_count) ||
6832 (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6833 (tx_rate < 0) || (tx_rate > actual_link_speed))
6834 return -EINVAL;
6835
6836 adapter->vf_rate_link_speed = actual_link_speed;
6837 adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6838 igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6839
6840 return 0;
6841 }
6842
6843 static int igb_ndo_get_vf_config(struct net_device *netdev,
6844 int vf, struct ifla_vf_info *ivi)
6845 {
6846 struct igb_adapter *adapter = netdev_priv(netdev);
6847 if (vf >= adapter->vfs_allocated_count)
6848 return -EINVAL;
6849 ivi->vf = vf;
6850 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6851 ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6852 ivi->vlan = adapter->vf_data[vf].pf_vlan;
6853 ivi->qos = adapter->vf_data[vf].pf_qos;
6854 return 0;
6855 }
6856
6857 static void igb_vmm_control(struct igb_adapter *adapter)
6858 {
6859 struct e1000_hw *hw = &adapter->hw;
6860 u32 reg;
6861
6862 switch (hw->mac.type) {
6863 case e1000_82575:
6864 default:
6865 /* replication is not supported for 82575 */
6866 return;
6867 case e1000_82576:
6868 /* notify HW that the MAC is adding vlan tags */
6869 reg = rd32(E1000_DTXCTL);
6870 reg |= E1000_DTXCTL_VLAN_ADDED;
6871 wr32(E1000_DTXCTL, reg);
6872 case e1000_82580:
6873 /* enable replication vlan tag stripping */
6874 reg = rd32(E1000_RPLOLR);
6875 reg |= E1000_RPLOLR_STRVLAN;
6876 wr32(E1000_RPLOLR, reg);
6877 case e1000_i350:
6878 /* none of the above registers are supported by i350 */
6879 break;
6880 }
6881
6882 if (adapter->vfs_allocated_count) {
6883 igb_vmdq_set_loopback_pf(hw, true);
6884 igb_vmdq_set_replication_pf(hw, true);
6885 igb_vmdq_set_anti_spoofing_pf(hw, true,
6886 adapter->vfs_allocated_count);
6887 } else {
6888 igb_vmdq_set_loopback_pf(hw, false);
6889 igb_vmdq_set_replication_pf(hw, false);
6890 }
6891 }
6892
6893 /* igb_main.c */
This page took 0.226722 seconds and 6 git commands to generate.