a7c9c5d77e0b3fb4d0a4fe48cadc5fac76cee96b
[deliverable/linux.git] / drivers / net / ethernet / intel / igb / igb_main.c
1 /*******************************************************************************
2
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2012 Intel Corporation.
5
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
9
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 more details.
14
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
21
22 Contact Information:
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
29
30 #include <linux/module.h>
31 #include <linux/types.h>
32 #include <linux/init.h>
33 #include <linux/bitops.h>
34 #include <linux/vmalloc.h>
35 #include <linux/pagemap.h>
36 #include <linux/netdevice.h>
37 #include <linux/ipv6.h>
38 #include <linux/slab.h>
39 #include <net/checksum.h>
40 #include <net/ip6_checksum.h>
41 #include <linux/net_tstamp.h>
42 #include <linux/mii.h>
43 #include <linux/ethtool.h>
44 #include <linux/if.h>
45 #include <linux/if_vlan.h>
46 #include <linux/pci.h>
47 #include <linux/pci-aspm.h>
48 #include <linux/delay.h>
49 #include <linux/interrupt.h>
50 #include <linux/ip.h>
51 #include <linux/tcp.h>
52 #include <linux/sctp.h>
53 #include <linux/if_ether.h>
54 #include <linux/aer.h>
55 #include <linux/prefetch.h>
56 #include <linux/pm_runtime.h>
57 #ifdef CONFIG_IGB_DCA
58 #include <linux/dca.h>
59 #endif
60 #include "igb.h"
61
62 #define MAJ 3
63 #define MIN 4
64 #define BUILD 7
65 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
66 __stringify(BUILD) "-k"
67 char igb_driver_name[] = "igb";
68 char igb_driver_version[] = DRV_VERSION;
69 static const char igb_driver_string[] =
70 "Intel(R) Gigabit Ethernet Network Driver";
71 static const char igb_copyright[] = "Copyright (c) 2007-2012 Intel Corporation.";
72
73 static const struct e1000_info *igb_info_tbl[] = {
74 [board_82575] = &e1000_82575_info,
75 };
76
77 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I211_COPPER), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER), board_82575 },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_FIBER), board_82575 },
81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES), board_82575 },
82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SGMII), board_82575 },
83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
90 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
91 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
92 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
93 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
94 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
95 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
96 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
97 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
98 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
99 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
100 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
101 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
102 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
103 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
104 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
105 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
106 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
107 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
108 /* required last entry */
109 {0, }
110 };
111
112 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
113
114 void igb_reset(struct igb_adapter *);
115 static int igb_setup_all_tx_resources(struct igb_adapter *);
116 static int igb_setup_all_rx_resources(struct igb_adapter *);
117 static void igb_free_all_tx_resources(struct igb_adapter *);
118 static void igb_free_all_rx_resources(struct igb_adapter *);
119 static void igb_setup_mrqc(struct igb_adapter *);
120 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
121 static void __devexit igb_remove(struct pci_dev *pdev);
122 static int igb_sw_init(struct igb_adapter *);
123 static int igb_open(struct net_device *);
124 static int igb_close(struct net_device *);
125 static void igb_configure_tx(struct igb_adapter *);
126 static void igb_configure_rx(struct igb_adapter *);
127 static void igb_clean_all_tx_rings(struct igb_adapter *);
128 static void igb_clean_all_rx_rings(struct igb_adapter *);
129 static void igb_clean_tx_ring(struct igb_ring *);
130 static void igb_clean_rx_ring(struct igb_ring *);
131 static void igb_set_rx_mode(struct net_device *);
132 static void igb_update_phy_info(unsigned long);
133 static void igb_watchdog(unsigned long);
134 static void igb_watchdog_task(struct work_struct *);
135 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
136 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
137 struct rtnl_link_stats64 *stats);
138 static int igb_change_mtu(struct net_device *, int);
139 static int igb_set_mac(struct net_device *, void *);
140 static void igb_set_uta(struct igb_adapter *adapter);
141 static irqreturn_t igb_intr(int irq, void *);
142 static irqreturn_t igb_intr_msi(int irq, void *);
143 static irqreturn_t igb_msix_other(int irq, void *);
144 static irqreturn_t igb_msix_ring(int irq, void *);
145 #ifdef CONFIG_IGB_DCA
146 static void igb_update_dca(struct igb_q_vector *);
147 static void igb_setup_dca(struct igb_adapter *);
148 #endif /* CONFIG_IGB_DCA */
149 static int igb_poll(struct napi_struct *, int);
150 static bool igb_clean_tx_irq(struct igb_q_vector *);
151 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
152 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
153 static void igb_tx_timeout(struct net_device *);
154 static void igb_reset_task(struct work_struct *);
155 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
156 static int igb_vlan_rx_add_vid(struct net_device *, u16);
157 static int igb_vlan_rx_kill_vid(struct net_device *, u16);
158 static void igb_restore_vlan(struct igb_adapter *);
159 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
160 static void igb_ping_all_vfs(struct igb_adapter *);
161 static void igb_msg_task(struct igb_adapter *);
162 static void igb_vmm_control(struct igb_adapter *);
163 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
164 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
165 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
166 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
167 int vf, u16 vlan, u8 qos);
168 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
169 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
170 struct ifla_vf_info *ivi);
171 static void igb_check_vf_rate_limit(struct igb_adapter *);
172
173 #ifdef CONFIG_PCI_IOV
174 static int igb_vf_configure(struct igb_adapter *adapter, int vf);
175 static int igb_find_enabled_vfs(struct igb_adapter *adapter);
176 static int igb_check_vf_assignment(struct igb_adapter *adapter);
177 #endif
178
179 #ifdef CONFIG_PM
180 #ifdef CONFIG_PM_SLEEP
181 static int igb_suspend(struct device *);
182 #endif
183 static int igb_resume(struct device *);
184 #ifdef CONFIG_PM_RUNTIME
185 static int igb_runtime_suspend(struct device *dev);
186 static int igb_runtime_resume(struct device *dev);
187 static int igb_runtime_idle(struct device *dev);
188 #endif
189 static const struct dev_pm_ops igb_pm_ops = {
190 SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
191 SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
192 igb_runtime_idle)
193 };
194 #endif
195 static void igb_shutdown(struct pci_dev *);
196 #ifdef CONFIG_IGB_DCA
197 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
198 static struct notifier_block dca_notifier = {
199 .notifier_call = igb_notify_dca,
200 .next = NULL,
201 .priority = 0
202 };
203 #endif
204 #ifdef CONFIG_NET_POLL_CONTROLLER
205 /* for netdump / net console */
206 static void igb_netpoll(struct net_device *);
207 #endif
208 #ifdef CONFIG_PCI_IOV
209 static unsigned int max_vfs = 0;
210 module_param(max_vfs, uint, 0);
211 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
212 "per physical function");
213 #endif /* CONFIG_PCI_IOV */
214
215 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
216 pci_channel_state_t);
217 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
218 static void igb_io_resume(struct pci_dev *);
219
220 static struct pci_error_handlers igb_err_handler = {
221 .error_detected = igb_io_error_detected,
222 .slot_reset = igb_io_slot_reset,
223 .resume = igb_io_resume,
224 };
225
226 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
227
228 static struct pci_driver igb_driver = {
229 .name = igb_driver_name,
230 .id_table = igb_pci_tbl,
231 .probe = igb_probe,
232 .remove = __devexit_p(igb_remove),
233 #ifdef CONFIG_PM
234 .driver.pm = &igb_pm_ops,
235 #endif
236 .shutdown = igb_shutdown,
237 .err_handler = &igb_err_handler
238 };
239
240 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
241 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
242 MODULE_LICENSE("GPL");
243 MODULE_VERSION(DRV_VERSION);
244
245 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
246 static int debug = -1;
247 module_param(debug, int, 0);
248 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
249
250 struct igb_reg_info {
251 u32 ofs;
252 char *name;
253 };
254
255 static const struct igb_reg_info igb_reg_info_tbl[] = {
256
257 /* General Registers */
258 {E1000_CTRL, "CTRL"},
259 {E1000_STATUS, "STATUS"},
260 {E1000_CTRL_EXT, "CTRL_EXT"},
261
262 /* Interrupt Registers */
263 {E1000_ICR, "ICR"},
264
265 /* RX Registers */
266 {E1000_RCTL, "RCTL"},
267 {E1000_RDLEN(0), "RDLEN"},
268 {E1000_RDH(0), "RDH"},
269 {E1000_RDT(0), "RDT"},
270 {E1000_RXDCTL(0), "RXDCTL"},
271 {E1000_RDBAL(0), "RDBAL"},
272 {E1000_RDBAH(0), "RDBAH"},
273
274 /* TX Registers */
275 {E1000_TCTL, "TCTL"},
276 {E1000_TDBAL(0), "TDBAL"},
277 {E1000_TDBAH(0), "TDBAH"},
278 {E1000_TDLEN(0), "TDLEN"},
279 {E1000_TDH(0), "TDH"},
280 {E1000_TDT(0), "TDT"},
281 {E1000_TXDCTL(0), "TXDCTL"},
282 {E1000_TDFH, "TDFH"},
283 {E1000_TDFT, "TDFT"},
284 {E1000_TDFHS, "TDFHS"},
285 {E1000_TDFPC, "TDFPC"},
286
287 /* List Terminator */
288 {}
289 };
290
291 /*
292 * igb_regdump - register printout routine
293 */
294 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
295 {
296 int n = 0;
297 char rname[16];
298 u32 regs[8];
299
300 switch (reginfo->ofs) {
301 case E1000_RDLEN(0):
302 for (n = 0; n < 4; n++)
303 regs[n] = rd32(E1000_RDLEN(n));
304 break;
305 case E1000_RDH(0):
306 for (n = 0; n < 4; n++)
307 regs[n] = rd32(E1000_RDH(n));
308 break;
309 case E1000_RDT(0):
310 for (n = 0; n < 4; n++)
311 regs[n] = rd32(E1000_RDT(n));
312 break;
313 case E1000_RXDCTL(0):
314 for (n = 0; n < 4; n++)
315 regs[n] = rd32(E1000_RXDCTL(n));
316 break;
317 case E1000_RDBAL(0):
318 for (n = 0; n < 4; n++)
319 regs[n] = rd32(E1000_RDBAL(n));
320 break;
321 case E1000_RDBAH(0):
322 for (n = 0; n < 4; n++)
323 regs[n] = rd32(E1000_RDBAH(n));
324 break;
325 case E1000_TDBAL(0):
326 for (n = 0; n < 4; n++)
327 regs[n] = rd32(E1000_RDBAL(n));
328 break;
329 case E1000_TDBAH(0):
330 for (n = 0; n < 4; n++)
331 regs[n] = rd32(E1000_TDBAH(n));
332 break;
333 case E1000_TDLEN(0):
334 for (n = 0; n < 4; n++)
335 regs[n] = rd32(E1000_TDLEN(n));
336 break;
337 case E1000_TDH(0):
338 for (n = 0; n < 4; n++)
339 regs[n] = rd32(E1000_TDH(n));
340 break;
341 case E1000_TDT(0):
342 for (n = 0; n < 4; n++)
343 regs[n] = rd32(E1000_TDT(n));
344 break;
345 case E1000_TXDCTL(0):
346 for (n = 0; n < 4; n++)
347 regs[n] = rd32(E1000_TXDCTL(n));
348 break;
349 default:
350 pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
351 return;
352 }
353
354 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
355 pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
356 regs[2], regs[3]);
357 }
358
359 /*
360 * igb_dump - Print registers, tx-rings and rx-rings
361 */
362 static void igb_dump(struct igb_adapter *adapter)
363 {
364 struct net_device *netdev = adapter->netdev;
365 struct e1000_hw *hw = &adapter->hw;
366 struct igb_reg_info *reginfo;
367 struct igb_ring *tx_ring;
368 union e1000_adv_tx_desc *tx_desc;
369 struct my_u0 { u64 a; u64 b; } *u0;
370 struct igb_ring *rx_ring;
371 union e1000_adv_rx_desc *rx_desc;
372 u32 staterr;
373 u16 i, n;
374
375 if (!netif_msg_hw(adapter))
376 return;
377
378 /* Print netdevice Info */
379 if (netdev) {
380 dev_info(&adapter->pdev->dev, "Net device Info\n");
381 pr_info("Device Name state trans_start "
382 "last_rx\n");
383 pr_info("%-15s %016lX %016lX %016lX\n", netdev->name,
384 netdev->state, netdev->trans_start, netdev->last_rx);
385 }
386
387 /* Print Registers */
388 dev_info(&adapter->pdev->dev, "Register Dump\n");
389 pr_info(" Register Name Value\n");
390 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
391 reginfo->name; reginfo++) {
392 igb_regdump(hw, reginfo);
393 }
394
395 /* Print TX Ring Summary */
396 if (!netdev || !netif_running(netdev))
397 goto exit;
398
399 dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
400 pr_info("Queue [NTU] [NTC] [bi(ntc)->dma ] leng ntw timestamp\n");
401 for (n = 0; n < adapter->num_tx_queues; n++) {
402 struct igb_tx_buffer *buffer_info;
403 tx_ring = adapter->tx_ring[n];
404 buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
405 pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
406 n, tx_ring->next_to_use, tx_ring->next_to_clean,
407 (u64)buffer_info->dma,
408 buffer_info->length,
409 buffer_info->next_to_watch,
410 (u64)buffer_info->time_stamp);
411 }
412
413 /* Print TX Rings */
414 if (!netif_msg_tx_done(adapter))
415 goto rx_ring_summary;
416
417 dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
418
419 /* Transmit Descriptor Formats
420 *
421 * Advanced Transmit Descriptor
422 * +--------------------------------------------------------------+
423 * 0 | Buffer Address [63:0] |
424 * +--------------------------------------------------------------+
425 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
426 * +--------------------------------------------------------------+
427 * 63 46 45 40 39 38 36 35 32 31 24 15 0
428 */
429
430 for (n = 0; n < adapter->num_tx_queues; n++) {
431 tx_ring = adapter->tx_ring[n];
432 pr_info("------------------------------------\n");
433 pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
434 pr_info("------------------------------------\n");
435 pr_info("T [desc] [address 63:0 ] [PlPOCIStDDM Ln] "
436 "[bi->dma ] leng ntw timestamp "
437 "bi->skb\n");
438
439 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
440 const char *next_desc;
441 struct igb_tx_buffer *buffer_info;
442 tx_desc = IGB_TX_DESC(tx_ring, i);
443 buffer_info = &tx_ring->tx_buffer_info[i];
444 u0 = (struct my_u0 *)tx_desc;
445 if (i == tx_ring->next_to_use &&
446 i == tx_ring->next_to_clean)
447 next_desc = " NTC/U";
448 else if (i == tx_ring->next_to_use)
449 next_desc = " NTU";
450 else if (i == tx_ring->next_to_clean)
451 next_desc = " NTC";
452 else
453 next_desc = "";
454
455 pr_info("T [0x%03X] %016llX %016llX %016llX"
456 " %04X %p %016llX %p%s\n", i,
457 le64_to_cpu(u0->a),
458 le64_to_cpu(u0->b),
459 (u64)buffer_info->dma,
460 buffer_info->length,
461 buffer_info->next_to_watch,
462 (u64)buffer_info->time_stamp,
463 buffer_info->skb, next_desc);
464
465 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
466 print_hex_dump(KERN_INFO, "",
467 DUMP_PREFIX_ADDRESS,
468 16, 1, phys_to_virt(buffer_info->dma),
469 buffer_info->length, true);
470 }
471 }
472
473 /* Print RX Rings Summary */
474 rx_ring_summary:
475 dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
476 pr_info("Queue [NTU] [NTC]\n");
477 for (n = 0; n < adapter->num_rx_queues; n++) {
478 rx_ring = adapter->rx_ring[n];
479 pr_info(" %5d %5X %5X\n",
480 n, rx_ring->next_to_use, rx_ring->next_to_clean);
481 }
482
483 /* Print RX Rings */
484 if (!netif_msg_rx_status(adapter))
485 goto exit;
486
487 dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
488
489 /* Advanced Receive Descriptor (Read) Format
490 * 63 1 0
491 * +-----------------------------------------------------+
492 * 0 | Packet Buffer Address [63:1] |A0/NSE|
493 * +----------------------------------------------+------+
494 * 8 | Header Buffer Address [63:1] | DD |
495 * +-----------------------------------------------------+
496 *
497 *
498 * Advanced Receive Descriptor (Write-Back) Format
499 *
500 * 63 48 47 32 31 30 21 20 17 16 4 3 0
501 * +------------------------------------------------------+
502 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
503 * | Checksum Ident | | | | Type | Type |
504 * +------------------------------------------------------+
505 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
506 * +------------------------------------------------------+
507 * 63 48 47 32 31 20 19 0
508 */
509
510 for (n = 0; n < adapter->num_rx_queues; n++) {
511 rx_ring = adapter->rx_ring[n];
512 pr_info("------------------------------------\n");
513 pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
514 pr_info("------------------------------------\n");
515 pr_info("R [desc] [ PktBuf A0] [ HeadBuf DD] "
516 "[bi->dma ] [bi->skb] <-- Adv Rx Read format\n");
517 pr_info("RWB[desc] [PcsmIpSHl PtRs] [vl er S cks ln] -----"
518 "----------- [bi->skb] <-- Adv Rx Write-Back format\n");
519
520 for (i = 0; i < rx_ring->count; i++) {
521 const char *next_desc;
522 struct igb_rx_buffer *buffer_info;
523 buffer_info = &rx_ring->rx_buffer_info[i];
524 rx_desc = IGB_RX_DESC(rx_ring, i);
525 u0 = (struct my_u0 *)rx_desc;
526 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
527
528 if (i == rx_ring->next_to_use)
529 next_desc = " NTU";
530 else if (i == rx_ring->next_to_clean)
531 next_desc = " NTC";
532 else
533 next_desc = "";
534
535 if (staterr & E1000_RXD_STAT_DD) {
536 /* Descriptor Done */
537 pr_info("%s[0x%03X] %016llX %016llX -------"
538 "--------- %p%s\n", "RWB", i,
539 le64_to_cpu(u0->a),
540 le64_to_cpu(u0->b),
541 buffer_info->skb, next_desc);
542 } else {
543 pr_info("%s[0x%03X] %016llX %016llX %016llX"
544 " %p%s\n", "R ", i,
545 le64_to_cpu(u0->a),
546 le64_to_cpu(u0->b),
547 (u64)buffer_info->dma,
548 buffer_info->skb, next_desc);
549
550 if (netif_msg_pktdata(adapter)) {
551 print_hex_dump(KERN_INFO, "",
552 DUMP_PREFIX_ADDRESS,
553 16, 1,
554 phys_to_virt(buffer_info->dma),
555 IGB_RX_HDR_LEN, true);
556 print_hex_dump(KERN_INFO, "",
557 DUMP_PREFIX_ADDRESS,
558 16, 1,
559 phys_to_virt(
560 buffer_info->page_dma +
561 buffer_info->page_offset),
562 PAGE_SIZE/2, true);
563 }
564 }
565 }
566 }
567
568 exit:
569 return;
570 }
571
572 /**
573 * igb_get_hw_dev - return device
574 * used by hardware layer to print debugging information
575 **/
576 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
577 {
578 struct igb_adapter *adapter = hw->back;
579 return adapter->netdev;
580 }
581
582 /**
583 * igb_init_module - Driver Registration Routine
584 *
585 * igb_init_module is the first routine called when the driver is
586 * loaded. All it does is register with the PCI subsystem.
587 **/
588 static int __init igb_init_module(void)
589 {
590 int ret;
591 pr_info("%s - version %s\n",
592 igb_driver_string, igb_driver_version);
593
594 pr_info("%s\n", igb_copyright);
595
596 #ifdef CONFIG_IGB_DCA
597 dca_register_notify(&dca_notifier);
598 #endif
599 ret = pci_register_driver(&igb_driver);
600 return ret;
601 }
602
603 module_init(igb_init_module);
604
605 /**
606 * igb_exit_module - Driver Exit Cleanup Routine
607 *
608 * igb_exit_module is called just before the driver is removed
609 * from memory.
610 **/
611 static void __exit igb_exit_module(void)
612 {
613 #ifdef CONFIG_IGB_DCA
614 dca_unregister_notify(&dca_notifier);
615 #endif
616 pci_unregister_driver(&igb_driver);
617 }
618
619 module_exit(igb_exit_module);
620
621 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
622 /**
623 * igb_cache_ring_register - Descriptor ring to register mapping
624 * @adapter: board private structure to initialize
625 *
626 * Once we know the feature-set enabled for the device, we'll cache
627 * the register offset the descriptor ring is assigned to.
628 **/
629 static void igb_cache_ring_register(struct igb_adapter *adapter)
630 {
631 int i = 0, j = 0;
632 u32 rbase_offset = adapter->vfs_allocated_count;
633
634 switch (adapter->hw.mac.type) {
635 case e1000_82576:
636 /* The queues are allocated for virtualization such that VF 0
637 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
638 * In order to avoid collision we start at the first free queue
639 * and continue consuming queues in the same sequence
640 */
641 if (adapter->vfs_allocated_count) {
642 for (; i < adapter->rss_queues; i++)
643 adapter->rx_ring[i]->reg_idx = rbase_offset +
644 Q_IDX_82576(i);
645 }
646 case e1000_82575:
647 case e1000_82580:
648 case e1000_i350:
649 case e1000_i210:
650 case e1000_i211:
651 default:
652 for (; i < adapter->num_rx_queues; i++)
653 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
654 for (; j < adapter->num_tx_queues; j++)
655 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
656 break;
657 }
658 }
659
660 static void igb_free_queues(struct igb_adapter *adapter)
661 {
662 int i;
663
664 for (i = 0; i < adapter->num_tx_queues; i++) {
665 kfree(adapter->tx_ring[i]);
666 adapter->tx_ring[i] = NULL;
667 }
668 for (i = 0; i < adapter->num_rx_queues; i++) {
669 kfree(adapter->rx_ring[i]);
670 adapter->rx_ring[i] = NULL;
671 }
672 adapter->num_rx_queues = 0;
673 adapter->num_tx_queues = 0;
674 }
675
676 /**
677 * igb_alloc_queues - Allocate memory for all rings
678 * @adapter: board private structure to initialize
679 *
680 * We allocate one ring per queue at run-time since we don't know the
681 * number of queues at compile-time.
682 **/
683 static int igb_alloc_queues(struct igb_adapter *adapter)
684 {
685 struct igb_ring *ring;
686 int i;
687 int orig_node = adapter->node;
688
689 for (i = 0; i < adapter->num_tx_queues; i++) {
690 if (orig_node == -1) {
691 int cur_node = next_online_node(adapter->node);
692 if (cur_node == MAX_NUMNODES)
693 cur_node = first_online_node;
694 adapter->node = cur_node;
695 }
696 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
697 adapter->node);
698 if (!ring)
699 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
700 if (!ring)
701 goto err;
702 ring->count = adapter->tx_ring_count;
703 ring->queue_index = i;
704 ring->dev = &adapter->pdev->dev;
705 ring->netdev = adapter->netdev;
706 ring->numa_node = adapter->node;
707 /* For 82575, context index must be unique per ring. */
708 if (adapter->hw.mac.type == e1000_82575)
709 set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
710 adapter->tx_ring[i] = ring;
711 }
712 /* Restore the adapter's original node */
713 adapter->node = orig_node;
714
715 for (i = 0; i < adapter->num_rx_queues; i++) {
716 if (orig_node == -1) {
717 int cur_node = next_online_node(adapter->node);
718 if (cur_node == MAX_NUMNODES)
719 cur_node = first_online_node;
720 adapter->node = cur_node;
721 }
722 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
723 adapter->node);
724 if (!ring)
725 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
726 if (!ring)
727 goto err;
728 ring->count = adapter->rx_ring_count;
729 ring->queue_index = i;
730 ring->dev = &adapter->pdev->dev;
731 ring->netdev = adapter->netdev;
732 ring->numa_node = adapter->node;
733 /* set flag indicating ring supports SCTP checksum offload */
734 if (adapter->hw.mac.type >= e1000_82576)
735 set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
736
737 /*
738 * On i350, i210, and i211, loopback VLAN packets
739 * have the tag byte-swapped.
740 * */
741 if (adapter->hw.mac.type >= e1000_i350)
742 set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
743
744 adapter->rx_ring[i] = ring;
745 }
746 /* Restore the adapter's original node */
747 adapter->node = orig_node;
748
749 igb_cache_ring_register(adapter);
750
751 return 0;
752
753 err:
754 /* Restore the adapter's original node */
755 adapter->node = orig_node;
756 igb_free_queues(adapter);
757
758 return -ENOMEM;
759 }
760
761 /**
762 * igb_write_ivar - configure ivar for given MSI-X vector
763 * @hw: pointer to the HW structure
764 * @msix_vector: vector number we are allocating to a given ring
765 * @index: row index of IVAR register to write within IVAR table
766 * @offset: column offset of in IVAR, should be multiple of 8
767 *
768 * This function is intended to handle the writing of the IVAR register
769 * for adapters 82576 and newer. The IVAR table consists of 2 columns,
770 * each containing an cause allocation for an Rx and Tx ring, and a
771 * variable number of rows depending on the number of queues supported.
772 **/
773 static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
774 int index, int offset)
775 {
776 u32 ivar = array_rd32(E1000_IVAR0, index);
777
778 /* clear any bits that are currently set */
779 ivar &= ~((u32)0xFF << offset);
780
781 /* write vector and valid bit */
782 ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
783
784 array_wr32(E1000_IVAR0, index, ivar);
785 }
786
787 #define IGB_N0_QUEUE -1
788 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
789 {
790 struct igb_adapter *adapter = q_vector->adapter;
791 struct e1000_hw *hw = &adapter->hw;
792 int rx_queue = IGB_N0_QUEUE;
793 int tx_queue = IGB_N0_QUEUE;
794 u32 msixbm = 0;
795
796 if (q_vector->rx.ring)
797 rx_queue = q_vector->rx.ring->reg_idx;
798 if (q_vector->tx.ring)
799 tx_queue = q_vector->tx.ring->reg_idx;
800
801 switch (hw->mac.type) {
802 case e1000_82575:
803 /* The 82575 assigns vectors using a bitmask, which matches the
804 bitmask for the EICR/EIMS/EIMC registers. To assign one
805 or more queues to a vector, we write the appropriate bits
806 into the MSIXBM register for that vector. */
807 if (rx_queue > IGB_N0_QUEUE)
808 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
809 if (tx_queue > IGB_N0_QUEUE)
810 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
811 if (!adapter->msix_entries && msix_vector == 0)
812 msixbm |= E1000_EIMS_OTHER;
813 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
814 q_vector->eims_value = msixbm;
815 break;
816 case e1000_82576:
817 /*
818 * 82576 uses a table that essentially consists of 2 columns
819 * with 8 rows. The ordering is column-major so we use the
820 * lower 3 bits as the row index, and the 4th bit as the
821 * column offset.
822 */
823 if (rx_queue > IGB_N0_QUEUE)
824 igb_write_ivar(hw, msix_vector,
825 rx_queue & 0x7,
826 (rx_queue & 0x8) << 1);
827 if (tx_queue > IGB_N0_QUEUE)
828 igb_write_ivar(hw, msix_vector,
829 tx_queue & 0x7,
830 ((tx_queue & 0x8) << 1) + 8);
831 q_vector->eims_value = 1 << msix_vector;
832 break;
833 case e1000_82580:
834 case e1000_i350:
835 case e1000_i210:
836 case e1000_i211:
837 /*
838 * On 82580 and newer adapters the scheme is similar to 82576
839 * however instead of ordering column-major we have things
840 * ordered row-major. So we traverse the table by using
841 * bit 0 as the column offset, and the remaining bits as the
842 * row index.
843 */
844 if (rx_queue > IGB_N0_QUEUE)
845 igb_write_ivar(hw, msix_vector,
846 rx_queue >> 1,
847 (rx_queue & 0x1) << 4);
848 if (tx_queue > IGB_N0_QUEUE)
849 igb_write_ivar(hw, msix_vector,
850 tx_queue >> 1,
851 ((tx_queue & 0x1) << 4) + 8);
852 q_vector->eims_value = 1 << msix_vector;
853 break;
854 default:
855 BUG();
856 break;
857 }
858
859 /* add q_vector eims value to global eims_enable_mask */
860 adapter->eims_enable_mask |= q_vector->eims_value;
861
862 /* configure q_vector to set itr on first interrupt */
863 q_vector->set_itr = 1;
864 }
865
866 /**
867 * igb_configure_msix - Configure MSI-X hardware
868 *
869 * igb_configure_msix sets up the hardware to properly
870 * generate MSI-X interrupts.
871 **/
872 static void igb_configure_msix(struct igb_adapter *adapter)
873 {
874 u32 tmp;
875 int i, vector = 0;
876 struct e1000_hw *hw = &adapter->hw;
877
878 adapter->eims_enable_mask = 0;
879
880 /* set vector for other causes, i.e. link changes */
881 switch (hw->mac.type) {
882 case e1000_82575:
883 tmp = rd32(E1000_CTRL_EXT);
884 /* enable MSI-X PBA support*/
885 tmp |= E1000_CTRL_EXT_PBA_CLR;
886
887 /* Auto-Mask interrupts upon ICR read. */
888 tmp |= E1000_CTRL_EXT_EIAME;
889 tmp |= E1000_CTRL_EXT_IRCA;
890
891 wr32(E1000_CTRL_EXT, tmp);
892
893 /* enable msix_other interrupt */
894 array_wr32(E1000_MSIXBM(0), vector++,
895 E1000_EIMS_OTHER);
896 adapter->eims_other = E1000_EIMS_OTHER;
897
898 break;
899
900 case e1000_82576:
901 case e1000_82580:
902 case e1000_i350:
903 case e1000_i210:
904 case e1000_i211:
905 /* Turn on MSI-X capability first, or our settings
906 * won't stick. And it will take days to debug. */
907 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
908 E1000_GPIE_PBA | E1000_GPIE_EIAME |
909 E1000_GPIE_NSICR);
910
911 /* enable msix_other interrupt */
912 adapter->eims_other = 1 << vector;
913 tmp = (vector++ | E1000_IVAR_VALID) << 8;
914
915 wr32(E1000_IVAR_MISC, tmp);
916 break;
917 default:
918 /* do nothing, since nothing else supports MSI-X */
919 break;
920 } /* switch (hw->mac.type) */
921
922 adapter->eims_enable_mask |= adapter->eims_other;
923
924 for (i = 0; i < adapter->num_q_vectors; i++)
925 igb_assign_vector(adapter->q_vector[i], vector++);
926
927 wrfl();
928 }
929
930 /**
931 * igb_request_msix - Initialize MSI-X interrupts
932 *
933 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
934 * kernel.
935 **/
936 static int igb_request_msix(struct igb_adapter *adapter)
937 {
938 struct net_device *netdev = adapter->netdev;
939 struct e1000_hw *hw = &adapter->hw;
940 int i, err = 0, vector = 0;
941
942 err = request_irq(adapter->msix_entries[vector].vector,
943 igb_msix_other, 0, netdev->name, adapter);
944 if (err)
945 goto out;
946 vector++;
947
948 for (i = 0; i < adapter->num_q_vectors; i++) {
949 struct igb_q_vector *q_vector = adapter->q_vector[i];
950
951 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
952
953 if (q_vector->rx.ring && q_vector->tx.ring)
954 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
955 q_vector->rx.ring->queue_index);
956 else if (q_vector->tx.ring)
957 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
958 q_vector->tx.ring->queue_index);
959 else if (q_vector->rx.ring)
960 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
961 q_vector->rx.ring->queue_index);
962 else
963 sprintf(q_vector->name, "%s-unused", netdev->name);
964
965 err = request_irq(adapter->msix_entries[vector].vector,
966 igb_msix_ring, 0, q_vector->name,
967 q_vector);
968 if (err)
969 goto out;
970 vector++;
971 }
972
973 igb_configure_msix(adapter);
974 return 0;
975 out:
976 return err;
977 }
978
979 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
980 {
981 if (adapter->msix_entries) {
982 pci_disable_msix(adapter->pdev);
983 kfree(adapter->msix_entries);
984 adapter->msix_entries = NULL;
985 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
986 pci_disable_msi(adapter->pdev);
987 }
988 }
989
990 /**
991 * igb_free_q_vectors - Free memory allocated for interrupt vectors
992 * @adapter: board private structure to initialize
993 *
994 * This function frees the memory allocated to the q_vectors. In addition if
995 * NAPI is enabled it will delete any references to the NAPI struct prior
996 * to freeing the q_vector.
997 **/
998 static void igb_free_q_vectors(struct igb_adapter *adapter)
999 {
1000 int v_idx;
1001
1002 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1003 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1004 adapter->q_vector[v_idx] = NULL;
1005 if (!q_vector)
1006 continue;
1007 netif_napi_del(&q_vector->napi);
1008 kfree(q_vector);
1009 }
1010 adapter->num_q_vectors = 0;
1011 }
1012
1013 /**
1014 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1015 *
1016 * This function resets the device so that it has 0 rx queues, tx queues, and
1017 * MSI-X interrupts allocated.
1018 */
1019 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1020 {
1021 igb_free_queues(adapter);
1022 igb_free_q_vectors(adapter);
1023 igb_reset_interrupt_capability(adapter);
1024 }
1025
1026 /**
1027 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1028 *
1029 * Attempt to configure interrupts using the best available
1030 * capabilities of the hardware and kernel.
1031 **/
1032 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1033 {
1034 int err;
1035 int numvecs, i;
1036
1037 /* Number of supported queues. */
1038 adapter->num_rx_queues = adapter->rss_queues;
1039 if (adapter->vfs_allocated_count)
1040 adapter->num_tx_queues = 1;
1041 else
1042 adapter->num_tx_queues = adapter->rss_queues;
1043
1044 /* start with one vector for every rx queue */
1045 numvecs = adapter->num_rx_queues;
1046
1047 /* if tx handler is separate add 1 for every tx queue */
1048 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1049 numvecs += adapter->num_tx_queues;
1050
1051 /* store the number of vectors reserved for queues */
1052 adapter->num_q_vectors = numvecs;
1053
1054 /* add 1 vector for link status interrupts */
1055 numvecs++;
1056 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1057 GFP_KERNEL);
1058
1059 if (!adapter->msix_entries)
1060 goto msi_only;
1061
1062 for (i = 0; i < numvecs; i++)
1063 adapter->msix_entries[i].entry = i;
1064
1065 err = pci_enable_msix(adapter->pdev,
1066 adapter->msix_entries,
1067 numvecs);
1068 if (err == 0)
1069 goto out;
1070
1071 igb_reset_interrupt_capability(adapter);
1072
1073 /* If we can't do MSI-X, try MSI */
1074 msi_only:
1075 #ifdef CONFIG_PCI_IOV
1076 /* disable SR-IOV for non MSI-X configurations */
1077 if (adapter->vf_data) {
1078 struct e1000_hw *hw = &adapter->hw;
1079 /* disable iov and allow time for transactions to clear */
1080 pci_disable_sriov(adapter->pdev);
1081 msleep(500);
1082
1083 kfree(adapter->vf_data);
1084 adapter->vf_data = NULL;
1085 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1086 wrfl();
1087 msleep(100);
1088 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1089 }
1090 #endif
1091 adapter->vfs_allocated_count = 0;
1092 adapter->rss_queues = 1;
1093 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1094 adapter->num_rx_queues = 1;
1095 adapter->num_tx_queues = 1;
1096 adapter->num_q_vectors = 1;
1097 if (!pci_enable_msi(adapter->pdev))
1098 adapter->flags |= IGB_FLAG_HAS_MSI;
1099 out:
1100 /* Notify the stack of the (possibly) reduced queue counts. */
1101 rtnl_lock();
1102 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1103 err = netif_set_real_num_rx_queues(adapter->netdev,
1104 adapter->num_rx_queues);
1105 rtnl_unlock();
1106 return err;
1107 }
1108
1109 /**
1110 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1111 * @adapter: board private structure to initialize
1112 *
1113 * We allocate one q_vector per queue interrupt. If allocation fails we
1114 * return -ENOMEM.
1115 **/
1116 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1117 {
1118 struct igb_q_vector *q_vector;
1119 struct e1000_hw *hw = &adapter->hw;
1120 int v_idx;
1121 int orig_node = adapter->node;
1122
1123 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1124 if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1125 adapter->num_tx_queues)) &&
1126 (adapter->num_rx_queues == v_idx))
1127 adapter->node = orig_node;
1128 if (orig_node == -1) {
1129 int cur_node = next_online_node(adapter->node);
1130 if (cur_node == MAX_NUMNODES)
1131 cur_node = first_online_node;
1132 adapter->node = cur_node;
1133 }
1134 q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1135 adapter->node);
1136 if (!q_vector)
1137 q_vector = kzalloc(sizeof(struct igb_q_vector),
1138 GFP_KERNEL);
1139 if (!q_vector)
1140 goto err_out;
1141 q_vector->adapter = adapter;
1142 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1143 q_vector->itr_val = IGB_START_ITR;
1144 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1145 adapter->q_vector[v_idx] = q_vector;
1146 }
1147 /* Restore the adapter's original node */
1148 adapter->node = orig_node;
1149
1150 return 0;
1151
1152 err_out:
1153 /* Restore the adapter's original node */
1154 adapter->node = orig_node;
1155 igb_free_q_vectors(adapter);
1156 return -ENOMEM;
1157 }
1158
1159 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1160 int ring_idx, int v_idx)
1161 {
1162 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1163
1164 q_vector->rx.ring = adapter->rx_ring[ring_idx];
1165 q_vector->rx.ring->q_vector = q_vector;
1166 q_vector->rx.count++;
1167 q_vector->itr_val = adapter->rx_itr_setting;
1168 if (q_vector->itr_val && q_vector->itr_val <= 3)
1169 q_vector->itr_val = IGB_START_ITR;
1170 }
1171
1172 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1173 int ring_idx, int v_idx)
1174 {
1175 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1176
1177 q_vector->tx.ring = adapter->tx_ring[ring_idx];
1178 q_vector->tx.ring->q_vector = q_vector;
1179 q_vector->tx.count++;
1180 q_vector->itr_val = adapter->tx_itr_setting;
1181 q_vector->tx.work_limit = adapter->tx_work_limit;
1182 if (q_vector->itr_val && q_vector->itr_val <= 3)
1183 q_vector->itr_val = IGB_START_ITR;
1184 }
1185
1186 /**
1187 * igb_map_ring_to_vector - maps allocated queues to vectors
1188 *
1189 * This function maps the recently allocated queues to vectors.
1190 **/
1191 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1192 {
1193 int i;
1194 int v_idx = 0;
1195
1196 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1197 (adapter->num_q_vectors < adapter->num_tx_queues))
1198 return -ENOMEM;
1199
1200 if (adapter->num_q_vectors >=
1201 (adapter->num_rx_queues + adapter->num_tx_queues)) {
1202 for (i = 0; i < adapter->num_rx_queues; i++)
1203 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1204 for (i = 0; i < adapter->num_tx_queues; i++)
1205 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1206 } else {
1207 for (i = 0; i < adapter->num_rx_queues; i++) {
1208 if (i < adapter->num_tx_queues)
1209 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1210 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1211 }
1212 for (; i < adapter->num_tx_queues; i++)
1213 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1214 }
1215 return 0;
1216 }
1217
1218 /**
1219 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1220 *
1221 * This function initializes the interrupts and allocates all of the queues.
1222 **/
1223 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1224 {
1225 struct pci_dev *pdev = adapter->pdev;
1226 int err;
1227
1228 err = igb_set_interrupt_capability(adapter);
1229 if (err)
1230 return err;
1231
1232 err = igb_alloc_q_vectors(adapter);
1233 if (err) {
1234 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1235 goto err_alloc_q_vectors;
1236 }
1237
1238 err = igb_alloc_queues(adapter);
1239 if (err) {
1240 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1241 goto err_alloc_queues;
1242 }
1243
1244 err = igb_map_ring_to_vector(adapter);
1245 if (err) {
1246 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1247 goto err_map_queues;
1248 }
1249
1250
1251 return 0;
1252 err_map_queues:
1253 igb_free_queues(adapter);
1254 err_alloc_queues:
1255 igb_free_q_vectors(adapter);
1256 err_alloc_q_vectors:
1257 igb_reset_interrupt_capability(adapter);
1258 return err;
1259 }
1260
1261 /**
1262 * igb_request_irq - initialize interrupts
1263 *
1264 * Attempts to configure interrupts using the best available
1265 * capabilities of the hardware and kernel.
1266 **/
1267 static int igb_request_irq(struct igb_adapter *adapter)
1268 {
1269 struct net_device *netdev = adapter->netdev;
1270 struct pci_dev *pdev = adapter->pdev;
1271 int err = 0;
1272
1273 if (adapter->msix_entries) {
1274 err = igb_request_msix(adapter);
1275 if (!err)
1276 goto request_done;
1277 /* fall back to MSI */
1278 igb_clear_interrupt_scheme(adapter);
1279 if (!pci_enable_msi(pdev))
1280 adapter->flags |= IGB_FLAG_HAS_MSI;
1281 igb_free_all_tx_resources(adapter);
1282 igb_free_all_rx_resources(adapter);
1283 adapter->num_tx_queues = 1;
1284 adapter->num_rx_queues = 1;
1285 adapter->num_q_vectors = 1;
1286 err = igb_alloc_q_vectors(adapter);
1287 if (err) {
1288 dev_err(&pdev->dev,
1289 "Unable to allocate memory for vectors\n");
1290 goto request_done;
1291 }
1292 err = igb_alloc_queues(adapter);
1293 if (err) {
1294 dev_err(&pdev->dev,
1295 "Unable to allocate memory for queues\n");
1296 igb_free_q_vectors(adapter);
1297 goto request_done;
1298 }
1299 igb_setup_all_tx_resources(adapter);
1300 igb_setup_all_rx_resources(adapter);
1301 }
1302
1303 igb_assign_vector(adapter->q_vector[0], 0);
1304
1305 if (adapter->flags & IGB_FLAG_HAS_MSI) {
1306 err = request_irq(pdev->irq, igb_intr_msi, 0,
1307 netdev->name, adapter);
1308 if (!err)
1309 goto request_done;
1310
1311 /* fall back to legacy interrupts */
1312 igb_reset_interrupt_capability(adapter);
1313 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1314 }
1315
1316 err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1317 netdev->name, adapter);
1318
1319 if (err)
1320 dev_err(&pdev->dev, "Error %d getting interrupt\n",
1321 err);
1322
1323 request_done:
1324 return err;
1325 }
1326
1327 static void igb_free_irq(struct igb_adapter *adapter)
1328 {
1329 if (adapter->msix_entries) {
1330 int vector = 0, i;
1331
1332 free_irq(adapter->msix_entries[vector++].vector, adapter);
1333
1334 for (i = 0; i < adapter->num_q_vectors; i++)
1335 free_irq(adapter->msix_entries[vector++].vector,
1336 adapter->q_vector[i]);
1337 } else {
1338 free_irq(adapter->pdev->irq, adapter);
1339 }
1340 }
1341
1342 /**
1343 * igb_irq_disable - Mask off interrupt generation on the NIC
1344 * @adapter: board private structure
1345 **/
1346 static void igb_irq_disable(struct igb_adapter *adapter)
1347 {
1348 struct e1000_hw *hw = &adapter->hw;
1349
1350 /*
1351 * we need to be careful when disabling interrupts. The VFs are also
1352 * mapped into these registers and so clearing the bits can cause
1353 * issues on the VF drivers so we only need to clear what we set
1354 */
1355 if (adapter->msix_entries) {
1356 u32 regval = rd32(E1000_EIAM);
1357 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1358 wr32(E1000_EIMC, adapter->eims_enable_mask);
1359 regval = rd32(E1000_EIAC);
1360 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1361 }
1362
1363 wr32(E1000_IAM, 0);
1364 wr32(E1000_IMC, ~0);
1365 wrfl();
1366 if (adapter->msix_entries) {
1367 int i;
1368 for (i = 0; i < adapter->num_q_vectors; i++)
1369 synchronize_irq(adapter->msix_entries[i].vector);
1370 } else {
1371 synchronize_irq(adapter->pdev->irq);
1372 }
1373 }
1374
1375 /**
1376 * igb_irq_enable - Enable default interrupt generation settings
1377 * @adapter: board private structure
1378 **/
1379 static void igb_irq_enable(struct igb_adapter *adapter)
1380 {
1381 struct e1000_hw *hw = &adapter->hw;
1382
1383 if (adapter->msix_entries) {
1384 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1385 u32 regval = rd32(E1000_EIAC);
1386 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1387 regval = rd32(E1000_EIAM);
1388 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1389 wr32(E1000_EIMS, adapter->eims_enable_mask);
1390 if (adapter->vfs_allocated_count) {
1391 wr32(E1000_MBVFIMR, 0xFF);
1392 ims |= E1000_IMS_VMMB;
1393 }
1394 wr32(E1000_IMS, ims);
1395 } else {
1396 wr32(E1000_IMS, IMS_ENABLE_MASK |
1397 E1000_IMS_DRSTA);
1398 wr32(E1000_IAM, IMS_ENABLE_MASK |
1399 E1000_IMS_DRSTA);
1400 }
1401 }
1402
1403 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1404 {
1405 struct e1000_hw *hw = &adapter->hw;
1406 u16 vid = adapter->hw.mng_cookie.vlan_id;
1407 u16 old_vid = adapter->mng_vlan_id;
1408
1409 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1410 /* add VID to filter table */
1411 igb_vfta_set(hw, vid, true);
1412 adapter->mng_vlan_id = vid;
1413 } else {
1414 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1415 }
1416
1417 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1418 (vid != old_vid) &&
1419 !test_bit(old_vid, adapter->active_vlans)) {
1420 /* remove VID from filter table */
1421 igb_vfta_set(hw, old_vid, false);
1422 }
1423 }
1424
1425 /**
1426 * igb_release_hw_control - release control of the h/w to f/w
1427 * @adapter: address of board private structure
1428 *
1429 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1430 * For ASF and Pass Through versions of f/w this means that the
1431 * driver is no longer loaded.
1432 *
1433 **/
1434 static void igb_release_hw_control(struct igb_adapter *adapter)
1435 {
1436 struct e1000_hw *hw = &adapter->hw;
1437 u32 ctrl_ext;
1438
1439 /* Let firmware take over control of h/w */
1440 ctrl_ext = rd32(E1000_CTRL_EXT);
1441 wr32(E1000_CTRL_EXT,
1442 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1443 }
1444
1445 /**
1446 * igb_get_hw_control - get control of the h/w from f/w
1447 * @adapter: address of board private structure
1448 *
1449 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1450 * For ASF and Pass Through versions of f/w this means that
1451 * the driver is loaded.
1452 *
1453 **/
1454 static void igb_get_hw_control(struct igb_adapter *adapter)
1455 {
1456 struct e1000_hw *hw = &adapter->hw;
1457 u32 ctrl_ext;
1458
1459 /* Let firmware know the driver has taken over */
1460 ctrl_ext = rd32(E1000_CTRL_EXT);
1461 wr32(E1000_CTRL_EXT,
1462 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1463 }
1464
1465 /**
1466 * igb_configure - configure the hardware for RX and TX
1467 * @adapter: private board structure
1468 **/
1469 static void igb_configure(struct igb_adapter *adapter)
1470 {
1471 struct net_device *netdev = adapter->netdev;
1472 int i;
1473
1474 igb_get_hw_control(adapter);
1475 igb_set_rx_mode(netdev);
1476
1477 igb_restore_vlan(adapter);
1478
1479 igb_setup_tctl(adapter);
1480 igb_setup_mrqc(adapter);
1481 igb_setup_rctl(adapter);
1482
1483 igb_configure_tx(adapter);
1484 igb_configure_rx(adapter);
1485
1486 igb_rx_fifo_flush_82575(&adapter->hw);
1487
1488 /* call igb_desc_unused which always leaves
1489 * at least 1 descriptor unused to make sure
1490 * next_to_use != next_to_clean */
1491 for (i = 0; i < adapter->num_rx_queues; i++) {
1492 struct igb_ring *ring = adapter->rx_ring[i];
1493 igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1494 }
1495 }
1496
1497 /**
1498 * igb_power_up_link - Power up the phy/serdes link
1499 * @adapter: address of board private structure
1500 **/
1501 void igb_power_up_link(struct igb_adapter *adapter)
1502 {
1503 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1504 igb_power_up_phy_copper(&adapter->hw);
1505 else
1506 igb_power_up_serdes_link_82575(&adapter->hw);
1507 igb_reset_phy(&adapter->hw);
1508 }
1509
1510 /**
1511 * igb_power_down_link - Power down the phy/serdes link
1512 * @adapter: address of board private structure
1513 */
1514 static void igb_power_down_link(struct igb_adapter *adapter)
1515 {
1516 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1517 igb_power_down_phy_copper_82575(&adapter->hw);
1518 else
1519 igb_shutdown_serdes_link_82575(&adapter->hw);
1520 }
1521
1522 /**
1523 * igb_up - Open the interface and prepare it to handle traffic
1524 * @adapter: board private structure
1525 **/
1526 int igb_up(struct igb_adapter *adapter)
1527 {
1528 struct e1000_hw *hw = &adapter->hw;
1529 int i;
1530
1531 /* hardware has been reset, we need to reload some things */
1532 igb_configure(adapter);
1533
1534 clear_bit(__IGB_DOWN, &adapter->state);
1535
1536 for (i = 0; i < adapter->num_q_vectors; i++)
1537 napi_enable(&(adapter->q_vector[i]->napi));
1538
1539 if (adapter->msix_entries)
1540 igb_configure_msix(adapter);
1541 else
1542 igb_assign_vector(adapter->q_vector[0], 0);
1543
1544 /* Clear any pending interrupts. */
1545 rd32(E1000_ICR);
1546 igb_irq_enable(adapter);
1547
1548 /* notify VFs that reset has been completed */
1549 if (adapter->vfs_allocated_count) {
1550 u32 reg_data = rd32(E1000_CTRL_EXT);
1551 reg_data |= E1000_CTRL_EXT_PFRSTD;
1552 wr32(E1000_CTRL_EXT, reg_data);
1553 }
1554
1555 netif_tx_start_all_queues(adapter->netdev);
1556
1557 /* start the watchdog. */
1558 hw->mac.get_link_status = 1;
1559 schedule_work(&adapter->watchdog_task);
1560
1561 return 0;
1562 }
1563
1564 void igb_down(struct igb_adapter *adapter)
1565 {
1566 struct net_device *netdev = adapter->netdev;
1567 struct e1000_hw *hw = &adapter->hw;
1568 u32 tctl, rctl;
1569 int i;
1570
1571 /* signal that we're down so the interrupt handler does not
1572 * reschedule our watchdog timer */
1573 set_bit(__IGB_DOWN, &adapter->state);
1574
1575 /* disable receives in the hardware */
1576 rctl = rd32(E1000_RCTL);
1577 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1578 /* flush and sleep below */
1579
1580 netif_tx_stop_all_queues(netdev);
1581
1582 /* disable transmits in the hardware */
1583 tctl = rd32(E1000_TCTL);
1584 tctl &= ~E1000_TCTL_EN;
1585 wr32(E1000_TCTL, tctl);
1586 /* flush both disables and wait for them to finish */
1587 wrfl();
1588 msleep(10);
1589
1590 for (i = 0; i < adapter->num_q_vectors; i++)
1591 napi_disable(&(adapter->q_vector[i]->napi));
1592
1593 igb_irq_disable(adapter);
1594
1595 del_timer_sync(&adapter->watchdog_timer);
1596 del_timer_sync(&adapter->phy_info_timer);
1597
1598 netif_carrier_off(netdev);
1599
1600 /* record the stats before reset*/
1601 spin_lock(&adapter->stats64_lock);
1602 igb_update_stats(adapter, &adapter->stats64);
1603 spin_unlock(&adapter->stats64_lock);
1604
1605 adapter->link_speed = 0;
1606 adapter->link_duplex = 0;
1607
1608 if (!pci_channel_offline(adapter->pdev))
1609 igb_reset(adapter);
1610 igb_clean_all_tx_rings(adapter);
1611 igb_clean_all_rx_rings(adapter);
1612 #ifdef CONFIG_IGB_DCA
1613
1614 /* since we reset the hardware DCA settings were cleared */
1615 igb_setup_dca(adapter);
1616 #endif
1617 }
1618
1619 void igb_reinit_locked(struct igb_adapter *adapter)
1620 {
1621 WARN_ON(in_interrupt());
1622 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1623 msleep(1);
1624 igb_down(adapter);
1625 igb_up(adapter);
1626 clear_bit(__IGB_RESETTING, &adapter->state);
1627 }
1628
1629 void igb_reset(struct igb_adapter *adapter)
1630 {
1631 struct pci_dev *pdev = adapter->pdev;
1632 struct e1000_hw *hw = &adapter->hw;
1633 struct e1000_mac_info *mac = &hw->mac;
1634 struct e1000_fc_info *fc = &hw->fc;
1635 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1636 u16 hwm;
1637
1638 /* Repartition Pba for greater than 9k mtu
1639 * To take effect CTRL.RST is required.
1640 */
1641 switch (mac->type) {
1642 case e1000_i350:
1643 case e1000_82580:
1644 pba = rd32(E1000_RXPBS);
1645 pba = igb_rxpbs_adjust_82580(pba);
1646 break;
1647 case e1000_82576:
1648 pba = rd32(E1000_RXPBS);
1649 pba &= E1000_RXPBS_SIZE_MASK_82576;
1650 break;
1651 case e1000_82575:
1652 case e1000_i210:
1653 case e1000_i211:
1654 default:
1655 pba = E1000_PBA_34K;
1656 break;
1657 }
1658
1659 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1660 (mac->type < e1000_82576)) {
1661 /* adjust PBA for jumbo frames */
1662 wr32(E1000_PBA, pba);
1663
1664 /* To maintain wire speed transmits, the Tx FIFO should be
1665 * large enough to accommodate two full transmit packets,
1666 * rounded up to the next 1KB and expressed in KB. Likewise,
1667 * the Rx FIFO should be large enough to accommodate at least
1668 * one full receive packet and is similarly rounded up and
1669 * expressed in KB. */
1670 pba = rd32(E1000_PBA);
1671 /* upper 16 bits has Tx packet buffer allocation size in KB */
1672 tx_space = pba >> 16;
1673 /* lower 16 bits has Rx packet buffer allocation size in KB */
1674 pba &= 0xffff;
1675 /* the tx fifo also stores 16 bytes of information about the tx
1676 * but don't include ethernet FCS because hardware appends it */
1677 min_tx_space = (adapter->max_frame_size +
1678 sizeof(union e1000_adv_tx_desc) -
1679 ETH_FCS_LEN) * 2;
1680 min_tx_space = ALIGN(min_tx_space, 1024);
1681 min_tx_space >>= 10;
1682 /* software strips receive CRC, so leave room for it */
1683 min_rx_space = adapter->max_frame_size;
1684 min_rx_space = ALIGN(min_rx_space, 1024);
1685 min_rx_space >>= 10;
1686
1687 /* If current Tx allocation is less than the min Tx FIFO size,
1688 * and the min Tx FIFO size is less than the current Rx FIFO
1689 * allocation, take space away from current Rx allocation */
1690 if (tx_space < min_tx_space &&
1691 ((min_tx_space - tx_space) < pba)) {
1692 pba = pba - (min_tx_space - tx_space);
1693
1694 /* if short on rx space, rx wins and must trump tx
1695 * adjustment */
1696 if (pba < min_rx_space)
1697 pba = min_rx_space;
1698 }
1699 wr32(E1000_PBA, pba);
1700 }
1701
1702 /* flow control settings */
1703 /* The high water mark must be low enough to fit one full frame
1704 * (or the size used for early receive) above it in the Rx FIFO.
1705 * Set it to the lower of:
1706 * - 90% of the Rx FIFO size, or
1707 * - the full Rx FIFO size minus one full frame */
1708 hwm = min(((pba << 10) * 9 / 10),
1709 ((pba << 10) - 2 * adapter->max_frame_size));
1710
1711 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1712 fc->low_water = fc->high_water - 16;
1713 fc->pause_time = 0xFFFF;
1714 fc->send_xon = 1;
1715 fc->current_mode = fc->requested_mode;
1716
1717 /* disable receive for all VFs and wait one second */
1718 if (adapter->vfs_allocated_count) {
1719 int i;
1720 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1721 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1722
1723 /* ping all the active vfs to let them know we are going down */
1724 igb_ping_all_vfs(adapter);
1725
1726 /* disable transmits and receives */
1727 wr32(E1000_VFRE, 0);
1728 wr32(E1000_VFTE, 0);
1729 }
1730
1731 /* Allow time for pending master requests to run */
1732 hw->mac.ops.reset_hw(hw);
1733 wr32(E1000_WUC, 0);
1734
1735 if (hw->mac.ops.init_hw(hw))
1736 dev_err(&pdev->dev, "Hardware Error\n");
1737
1738 /*
1739 * Flow control settings reset on hardware reset, so guarantee flow
1740 * control is off when forcing speed.
1741 */
1742 if (!hw->mac.autoneg)
1743 igb_force_mac_fc(hw);
1744
1745 igb_init_dmac(adapter, pba);
1746 if (!netif_running(adapter->netdev))
1747 igb_power_down_link(adapter);
1748
1749 igb_update_mng_vlan(adapter);
1750
1751 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1752 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1753
1754 igb_get_phy_info(hw);
1755 }
1756
1757 static netdev_features_t igb_fix_features(struct net_device *netdev,
1758 netdev_features_t features)
1759 {
1760 /*
1761 * Since there is no support for separate rx/tx vlan accel
1762 * enable/disable make sure tx flag is always in same state as rx.
1763 */
1764 if (features & NETIF_F_HW_VLAN_RX)
1765 features |= NETIF_F_HW_VLAN_TX;
1766 else
1767 features &= ~NETIF_F_HW_VLAN_TX;
1768
1769 return features;
1770 }
1771
1772 static int igb_set_features(struct net_device *netdev,
1773 netdev_features_t features)
1774 {
1775 netdev_features_t changed = netdev->features ^ features;
1776 struct igb_adapter *adapter = netdev_priv(netdev);
1777
1778 if (changed & NETIF_F_HW_VLAN_RX)
1779 igb_vlan_mode(netdev, features);
1780
1781 if (!(changed & NETIF_F_RXALL))
1782 return 0;
1783
1784 netdev->features = features;
1785
1786 if (netif_running(netdev))
1787 igb_reinit_locked(adapter);
1788 else
1789 igb_reset(adapter);
1790
1791 return 0;
1792 }
1793
1794 static const struct net_device_ops igb_netdev_ops = {
1795 .ndo_open = igb_open,
1796 .ndo_stop = igb_close,
1797 .ndo_start_xmit = igb_xmit_frame,
1798 .ndo_get_stats64 = igb_get_stats64,
1799 .ndo_set_rx_mode = igb_set_rx_mode,
1800 .ndo_set_mac_address = igb_set_mac,
1801 .ndo_change_mtu = igb_change_mtu,
1802 .ndo_do_ioctl = igb_ioctl,
1803 .ndo_tx_timeout = igb_tx_timeout,
1804 .ndo_validate_addr = eth_validate_addr,
1805 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1806 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1807 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1808 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1809 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1810 .ndo_get_vf_config = igb_ndo_get_vf_config,
1811 #ifdef CONFIG_NET_POLL_CONTROLLER
1812 .ndo_poll_controller = igb_netpoll,
1813 #endif
1814 .ndo_fix_features = igb_fix_features,
1815 .ndo_set_features = igb_set_features,
1816 };
1817
1818 /**
1819 * igb_probe - Device Initialization Routine
1820 * @pdev: PCI device information struct
1821 * @ent: entry in igb_pci_tbl
1822 *
1823 * Returns 0 on success, negative on failure
1824 *
1825 * igb_probe initializes an adapter identified by a pci_dev structure.
1826 * The OS initialization, configuring of the adapter private structure,
1827 * and a hardware reset occur.
1828 **/
1829 static int __devinit igb_probe(struct pci_dev *pdev,
1830 const struct pci_device_id *ent)
1831 {
1832 struct net_device *netdev;
1833 struct igb_adapter *adapter;
1834 struct e1000_hw *hw;
1835 u16 eeprom_data = 0;
1836 s32 ret_val;
1837 static int global_quad_port_a; /* global quad port a indication */
1838 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1839 unsigned long mmio_start, mmio_len;
1840 int err, pci_using_dac;
1841 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1842 u8 part_str[E1000_PBANUM_LENGTH];
1843
1844 /* Catch broken hardware that put the wrong VF device ID in
1845 * the PCIe SR-IOV capability.
1846 */
1847 if (pdev->is_virtfn) {
1848 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1849 pci_name(pdev), pdev->vendor, pdev->device);
1850 return -EINVAL;
1851 }
1852
1853 err = pci_enable_device_mem(pdev);
1854 if (err)
1855 return err;
1856
1857 pci_using_dac = 0;
1858 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1859 if (!err) {
1860 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1861 if (!err)
1862 pci_using_dac = 1;
1863 } else {
1864 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1865 if (err) {
1866 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1867 if (err) {
1868 dev_err(&pdev->dev, "No usable DMA "
1869 "configuration, aborting\n");
1870 goto err_dma;
1871 }
1872 }
1873 }
1874
1875 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1876 IORESOURCE_MEM),
1877 igb_driver_name);
1878 if (err)
1879 goto err_pci_reg;
1880
1881 pci_enable_pcie_error_reporting(pdev);
1882
1883 pci_set_master(pdev);
1884 pci_save_state(pdev);
1885
1886 err = -ENOMEM;
1887 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1888 IGB_MAX_TX_QUEUES);
1889 if (!netdev)
1890 goto err_alloc_etherdev;
1891
1892 SET_NETDEV_DEV(netdev, &pdev->dev);
1893
1894 pci_set_drvdata(pdev, netdev);
1895 adapter = netdev_priv(netdev);
1896 adapter->netdev = netdev;
1897 adapter->pdev = pdev;
1898 hw = &adapter->hw;
1899 hw->back = adapter;
1900 adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
1901
1902 mmio_start = pci_resource_start(pdev, 0);
1903 mmio_len = pci_resource_len(pdev, 0);
1904
1905 err = -EIO;
1906 hw->hw_addr = ioremap(mmio_start, mmio_len);
1907 if (!hw->hw_addr)
1908 goto err_ioremap;
1909
1910 netdev->netdev_ops = &igb_netdev_ops;
1911 igb_set_ethtool_ops(netdev);
1912 netdev->watchdog_timeo = 5 * HZ;
1913
1914 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1915
1916 netdev->mem_start = mmio_start;
1917 netdev->mem_end = mmio_start + mmio_len;
1918
1919 /* PCI config space info */
1920 hw->vendor_id = pdev->vendor;
1921 hw->device_id = pdev->device;
1922 hw->revision_id = pdev->revision;
1923 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1924 hw->subsystem_device_id = pdev->subsystem_device;
1925
1926 /* Copy the default MAC, PHY and NVM function pointers */
1927 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1928 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1929 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1930 /* Initialize skew-specific constants */
1931 err = ei->get_invariants(hw);
1932 if (err)
1933 goto err_sw_init;
1934
1935 /* setup the private structure */
1936 err = igb_sw_init(adapter);
1937 if (err)
1938 goto err_sw_init;
1939
1940 igb_get_bus_info_pcie(hw);
1941
1942 hw->phy.autoneg_wait_to_complete = false;
1943
1944 /* Copper options */
1945 if (hw->phy.media_type == e1000_media_type_copper) {
1946 hw->phy.mdix = AUTO_ALL_MODES;
1947 hw->phy.disable_polarity_correction = false;
1948 hw->phy.ms_type = e1000_ms_hw_default;
1949 }
1950
1951 if (igb_check_reset_block(hw))
1952 dev_info(&pdev->dev,
1953 "PHY reset is blocked due to SOL/IDER session.\n");
1954
1955 /*
1956 * features is initialized to 0 in allocation, it might have bits
1957 * set by igb_sw_init so we should use an or instead of an
1958 * assignment.
1959 */
1960 netdev->features |= NETIF_F_SG |
1961 NETIF_F_IP_CSUM |
1962 NETIF_F_IPV6_CSUM |
1963 NETIF_F_TSO |
1964 NETIF_F_TSO6 |
1965 NETIF_F_RXHASH |
1966 NETIF_F_RXCSUM |
1967 NETIF_F_HW_VLAN_RX |
1968 NETIF_F_HW_VLAN_TX;
1969
1970 /* copy netdev features into list of user selectable features */
1971 netdev->hw_features |= netdev->features;
1972 netdev->hw_features |= NETIF_F_RXALL;
1973
1974 /* set this bit last since it cannot be part of hw_features */
1975 netdev->features |= NETIF_F_HW_VLAN_FILTER;
1976
1977 netdev->vlan_features |= NETIF_F_TSO |
1978 NETIF_F_TSO6 |
1979 NETIF_F_IP_CSUM |
1980 NETIF_F_IPV6_CSUM |
1981 NETIF_F_SG;
1982
1983 netdev->priv_flags |= IFF_SUPP_NOFCS;
1984
1985 if (pci_using_dac) {
1986 netdev->features |= NETIF_F_HIGHDMA;
1987 netdev->vlan_features |= NETIF_F_HIGHDMA;
1988 }
1989
1990 if (hw->mac.type >= e1000_82576) {
1991 netdev->hw_features |= NETIF_F_SCTP_CSUM;
1992 netdev->features |= NETIF_F_SCTP_CSUM;
1993 }
1994
1995 netdev->priv_flags |= IFF_UNICAST_FLT;
1996
1997 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1998
1999 /* before reading the NVM, reset the controller to put the device in a
2000 * known good starting state */
2001 hw->mac.ops.reset_hw(hw);
2002
2003 /*
2004 * make sure the NVM is good , i211 parts have special NVM that
2005 * doesn't contain a checksum
2006 */
2007 if (hw->mac.type != e1000_i211) {
2008 if (hw->nvm.ops.validate(hw) < 0) {
2009 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
2010 err = -EIO;
2011 goto err_eeprom;
2012 }
2013 }
2014
2015 /* copy the MAC address out of the NVM */
2016 if (hw->mac.ops.read_mac_addr(hw))
2017 dev_err(&pdev->dev, "NVM Read Error\n");
2018
2019 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2020 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2021
2022 if (!is_valid_ether_addr(netdev->perm_addr)) {
2023 dev_err(&pdev->dev, "Invalid MAC Address\n");
2024 err = -EIO;
2025 goto err_eeprom;
2026 }
2027
2028 setup_timer(&adapter->watchdog_timer, igb_watchdog,
2029 (unsigned long) adapter);
2030 setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2031 (unsigned long) adapter);
2032
2033 INIT_WORK(&adapter->reset_task, igb_reset_task);
2034 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2035
2036 /* Initialize link properties that are user-changeable */
2037 adapter->fc_autoneg = true;
2038 hw->mac.autoneg = true;
2039 hw->phy.autoneg_advertised = 0x2f;
2040
2041 hw->fc.requested_mode = e1000_fc_default;
2042 hw->fc.current_mode = e1000_fc_default;
2043
2044 igb_validate_mdi_setting(hw);
2045
2046 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2047 * enable the ACPI Magic Packet filter
2048 */
2049
2050 if (hw->bus.func == 0)
2051 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2052 else if (hw->mac.type >= e1000_82580)
2053 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2054 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2055 &eeprom_data);
2056 else if (hw->bus.func == 1)
2057 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2058
2059 if (eeprom_data & eeprom_apme_mask)
2060 adapter->eeprom_wol |= E1000_WUFC_MAG;
2061
2062 /* now that we have the eeprom settings, apply the special cases where
2063 * the eeprom may be wrong or the board simply won't support wake on
2064 * lan on a particular port */
2065 switch (pdev->device) {
2066 case E1000_DEV_ID_82575GB_QUAD_COPPER:
2067 adapter->eeprom_wol = 0;
2068 break;
2069 case E1000_DEV_ID_82575EB_FIBER_SERDES:
2070 case E1000_DEV_ID_82576_FIBER:
2071 case E1000_DEV_ID_82576_SERDES:
2072 /* Wake events only supported on port A for dual fiber
2073 * regardless of eeprom setting */
2074 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2075 adapter->eeprom_wol = 0;
2076 break;
2077 case E1000_DEV_ID_82576_QUAD_COPPER:
2078 case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2079 /* if quad port adapter, disable WoL on all but port A */
2080 if (global_quad_port_a != 0)
2081 adapter->eeprom_wol = 0;
2082 else
2083 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2084 /* Reset for multiple quad port adapters */
2085 if (++global_quad_port_a == 4)
2086 global_quad_port_a = 0;
2087 break;
2088 }
2089
2090 /* initialize the wol settings based on the eeprom settings */
2091 adapter->wol = adapter->eeprom_wol;
2092 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2093
2094 /* reset the hardware with the new settings */
2095 igb_reset(adapter);
2096
2097 /* let the f/w know that the h/w is now under the control of the
2098 * driver. */
2099 igb_get_hw_control(adapter);
2100
2101 strcpy(netdev->name, "eth%d");
2102 err = register_netdev(netdev);
2103 if (err)
2104 goto err_register;
2105
2106 /* carrier off reporting is important to ethtool even BEFORE open */
2107 netif_carrier_off(netdev);
2108
2109 #ifdef CONFIG_IGB_DCA
2110 if (dca_add_requester(&pdev->dev) == 0) {
2111 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2112 dev_info(&pdev->dev, "DCA enabled\n");
2113 igb_setup_dca(adapter);
2114 }
2115
2116 #endif
2117 #ifdef CONFIG_IGB_PTP
2118 /* do hw tstamp init after resetting */
2119 igb_ptp_init(adapter);
2120
2121 #endif
2122 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2123 /* print bus type/speed/width info */
2124 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2125 netdev->name,
2126 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2127 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2128 "unknown"),
2129 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2130 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2131 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2132 "unknown"),
2133 netdev->dev_addr);
2134
2135 ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2136 if (ret_val)
2137 strcpy(part_str, "Unknown");
2138 dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2139 dev_info(&pdev->dev,
2140 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2141 adapter->msix_entries ? "MSI-X" :
2142 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2143 adapter->num_rx_queues, adapter->num_tx_queues);
2144 switch (hw->mac.type) {
2145 case e1000_i350:
2146 case e1000_i210:
2147 case e1000_i211:
2148 igb_set_eee_i350(hw);
2149 break;
2150 default:
2151 break;
2152 }
2153
2154 pm_runtime_put_noidle(&pdev->dev);
2155 return 0;
2156
2157 err_register:
2158 igb_release_hw_control(adapter);
2159 err_eeprom:
2160 if (!igb_check_reset_block(hw))
2161 igb_reset_phy(hw);
2162
2163 if (hw->flash_address)
2164 iounmap(hw->flash_address);
2165 err_sw_init:
2166 igb_clear_interrupt_scheme(adapter);
2167 iounmap(hw->hw_addr);
2168 err_ioremap:
2169 free_netdev(netdev);
2170 err_alloc_etherdev:
2171 pci_release_selected_regions(pdev,
2172 pci_select_bars(pdev, IORESOURCE_MEM));
2173 err_pci_reg:
2174 err_dma:
2175 pci_disable_device(pdev);
2176 return err;
2177 }
2178
2179 /**
2180 * igb_remove - Device Removal Routine
2181 * @pdev: PCI device information struct
2182 *
2183 * igb_remove is called by the PCI subsystem to alert the driver
2184 * that it should release a PCI device. The could be caused by a
2185 * Hot-Plug event, or because the driver is going to be removed from
2186 * memory.
2187 **/
2188 static void __devexit igb_remove(struct pci_dev *pdev)
2189 {
2190 struct net_device *netdev = pci_get_drvdata(pdev);
2191 struct igb_adapter *adapter = netdev_priv(netdev);
2192 struct e1000_hw *hw = &adapter->hw;
2193
2194 pm_runtime_get_noresume(&pdev->dev);
2195 #ifdef CONFIG_IGB_PTP
2196 igb_ptp_remove(adapter);
2197
2198 #endif
2199 /*
2200 * The watchdog timer may be rescheduled, so explicitly
2201 * disable watchdog from being rescheduled.
2202 */
2203 set_bit(__IGB_DOWN, &adapter->state);
2204 del_timer_sync(&adapter->watchdog_timer);
2205 del_timer_sync(&adapter->phy_info_timer);
2206
2207 cancel_work_sync(&adapter->reset_task);
2208 cancel_work_sync(&adapter->watchdog_task);
2209
2210 #ifdef CONFIG_IGB_DCA
2211 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2212 dev_info(&pdev->dev, "DCA disabled\n");
2213 dca_remove_requester(&pdev->dev);
2214 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2215 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2216 }
2217 #endif
2218
2219 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2220 * would have already happened in close and is redundant. */
2221 igb_release_hw_control(adapter);
2222
2223 unregister_netdev(netdev);
2224
2225 igb_clear_interrupt_scheme(adapter);
2226
2227 #ifdef CONFIG_PCI_IOV
2228 /* reclaim resources allocated to VFs */
2229 if (adapter->vf_data) {
2230 /* disable iov and allow time for transactions to clear */
2231 if (!igb_check_vf_assignment(adapter)) {
2232 pci_disable_sriov(pdev);
2233 msleep(500);
2234 } else {
2235 dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2236 }
2237
2238 kfree(adapter->vf_data);
2239 adapter->vf_data = NULL;
2240 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2241 wrfl();
2242 msleep(100);
2243 dev_info(&pdev->dev, "IOV Disabled\n");
2244 }
2245 #endif
2246
2247 iounmap(hw->hw_addr);
2248 if (hw->flash_address)
2249 iounmap(hw->flash_address);
2250 pci_release_selected_regions(pdev,
2251 pci_select_bars(pdev, IORESOURCE_MEM));
2252
2253 kfree(adapter->shadow_vfta);
2254 free_netdev(netdev);
2255
2256 pci_disable_pcie_error_reporting(pdev);
2257
2258 pci_disable_device(pdev);
2259 }
2260
2261 /**
2262 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2263 * @adapter: board private structure to initialize
2264 *
2265 * This function initializes the vf specific data storage and then attempts to
2266 * allocate the VFs. The reason for ordering it this way is because it is much
2267 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2268 * the memory for the VFs.
2269 **/
2270 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2271 {
2272 #ifdef CONFIG_PCI_IOV
2273 struct pci_dev *pdev = adapter->pdev;
2274 struct e1000_hw *hw = &adapter->hw;
2275 int old_vfs = igb_find_enabled_vfs(adapter);
2276 int i;
2277
2278 /* Virtualization features not supported on i210 family. */
2279 if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211))
2280 return;
2281
2282 if (old_vfs) {
2283 dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2284 "max_vfs setting of %d\n", old_vfs, max_vfs);
2285 adapter->vfs_allocated_count = old_vfs;
2286 }
2287
2288 if (!adapter->vfs_allocated_count)
2289 return;
2290
2291 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2292 sizeof(struct vf_data_storage), GFP_KERNEL);
2293
2294 /* if allocation failed then we do not support SR-IOV */
2295 if (!adapter->vf_data) {
2296 adapter->vfs_allocated_count = 0;
2297 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2298 "Data Storage\n");
2299 goto out;
2300 }
2301
2302 if (!old_vfs) {
2303 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2304 goto err_out;
2305 }
2306 dev_info(&pdev->dev, "%d VFs allocated\n",
2307 adapter->vfs_allocated_count);
2308 for (i = 0; i < adapter->vfs_allocated_count; i++)
2309 igb_vf_configure(adapter, i);
2310
2311 /* DMA Coalescing is not supported in IOV mode. */
2312 adapter->flags &= ~IGB_FLAG_DMAC;
2313 goto out;
2314 err_out:
2315 kfree(adapter->vf_data);
2316 adapter->vf_data = NULL;
2317 adapter->vfs_allocated_count = 0;
2318 out:
2319 return;
2320 #endif /* CONFIG_PCI_IOV */
2321 }
2322
2323 /**
2324 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2325 * @adapter: board private structure to initialize
2326 *
2327 * igb_sw_init initializes the Adapter private data structure.
2328 * Fields are initialized based on PCI device information and
2329 * OS network device settings (MTU size).
2330 **/
2331 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2332 {
2333 struct e1000_hw *hw = &adapter->hw;
2334 struct net_device *netdev = adapter->netdev;
2335 struct pci_dev *pdev = adapter->pdev;
2336 u32 max_rss_queues;
2337
2338 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2339
2340 /* set default ring sizes */
2341 adapter->tx_ring_count = IGB_DEFAULT_TXD;
2342 adapter->rx_ring_count = IGB_DEFAULT_RXD;
2343
2344 /* set default ITR values */
2345 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2346 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2347
2348 /* set default work limits */
2349 adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2350
2351 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2352 VLAN_HLEN;
2353 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2354
2355 adapter->node = -1;
2356
2357 spin_lock_init(&adapter->stats64_lock);
2358 #ifdef CONFIG_PCI_IOV
2359 switch (hw->mac.type) {
2360 case e1000_82576:
2361 case e1000_i350:
2362 if (max_vfs > 7) {
2363 dev_warn(&pdev->dev,
2364 "Maximum of 7 VFs per PF, using max\n");
2365 adapter->vfs_allocated_count = 7;
2366 } else
2367 adapter->vfs_allocated_count = max_vfs;
2368 break;
2369 default:
2370 break;
2371 }
2372 #endif /* CONFIG_PCI_IOV */
2373
2374 /* Determine the maximum number of RSS queues supported. */
2375 switch (hw->mac.type) {
2376 case e1000_i211:
2377 max_rss_queues = IGB_MAX_RX_QUEUES_I211;
2378 break;
2379 case e1000_82575:
2380 case e1000_i210:
2381 max_rss_queues = IGB_MAX_RX_QUEUES_82575;
2382 break;
2383 case e1000_i350:
2384 /* I350 cannot do RSS and SR-IOV at the same time */
2385 if (!!adapter->vfs_allocated_count) {
2386 max_rss_queues = 1;
2387 break;
2388 }
2389 /* fall through */
2390 case e1000_82576:
2391 if (!!adapter->vfs_allocated_count) {
2392 max_rss_queues = 2;
2393 break;
2394 }
2395 /* fall through */
2396 case e1000_82580:
2397 default:
2398 max_rss_queues = IGB_MAX_RX_QUEUES;
2399 break;
2400 }
2401
2402 adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus());
2403
2404 /* Determine if we need to pair queues. */
2405 switch (hw->mac.type) {
2406 case e1000_82575:
2407 case e1000_i211:
2408 /* Device supports enough interrupts without queue pairing. */
2409 break;
2410 case e1000_82576:
2411 /*
2412 * If VFs are going to be allocated with RSS queues then we
2413 * should pair the queues in order to conserve interrupts due
2414 * to limited supply.
2415 */
2416 if ((adapter->rss_queues > 1) &&
2417 (adapter->vfs_allocated_count > 6))
2418 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2419 /* fall through */
2420 case e1000_82580:
2421 case e1000_i350:
2422 case e1000_i210:
2423 default:
2424 /*
2425 * If rss_queues > half of max_rss_queues, pair the queues in
2426 * order to conserve interrupts due to limited supply.
2427 */
2428 if (adapter->rss_queues > (max_rss_queues / 2))
2429 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2430 break;
2431 }
2432
2433 /* Setup and initialize a copy of the hw vlan table array */
2434 adapter->shadow_vfta = kzalloc(sizeof(u32) *
2435 E1000_VLAN_FILTER_TBL_SIZE,
2436 GFP_ATOMIC);
2437
2438 /* This call may decrease the number of queues */
2439 if (igb_init_interrupt_scheme(adapter)) {
2440 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2441 return -ENOMEM;
2442 }
2443
2444 igb_probe_vfs(adapter);
2445
2446 /* Explicitly disable IRQ since the NIC can be in any state. */
2447 igb_irq_disable(adapter);
2448
2449 if (hw->mac.type >= e1000_i350)
2450 adapter->flags &= ~IGB_FLAG_DMAC;
2451
2452 set_bit(__IGB_DOWN, &adapter->state);
2453 return 0;
2454 }
2455
2456 /**
2457 * igb_open - Called when a network interface is made active
2458 * @netdev: network interface device structure
2459 *
2460 * Returns 0 on success, negative value on failure
2461 *
2462 * The open entry point is called when a network interface is made
2463 * active by the system (IFF_UP). At this point all resources needed
2464 * for transmit and receive operations are allocated, the interrupt
2465 * handler is registered with the OS, the watchdog timer is started,
2466 * and the stack is notified that the interface is ready.
2467 **/
2468 static int __igb_open(struct net_device *netdev, bool resuming)
2469 {
2470 struct igb_adapter *adapter = netdev_priv(netdev);
2471 struct e1000_hw *hw = &adapter->hw;
2472 struct pci_dev *pdev = adapter->pdev;
2473 int err;
2474 int i;
2475
2476 /* disallow open during test */
2477 if (test_bit(__IGB_TESTING, &adapter->state)) {
2478 WARN_ON(resuming);
2479 return -EBUSY;
2480 }
2481
2482 if (!resuming)
2483 pm_runtime_get_sync(&pdev->dev);
2484
2485 netif_carrier_off(netdev);
2486
2487 /* allocate transmit descriptors */
2488 err = igb_setup_all_tx_resources(adapter);
2489 if (err)
2490 goto err_setup_tx;
2491
2492 /* allocate receive descriptors */
2493 err = igb_setup_all_rx_resources(adapter);
2494 if (err)
2495 goto err_setup_rx;
2496
2497 igb_power_up_link(adapter);
2498
2499 /* before we allocate an interrupt, we must be ready to handle it.
2500 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2501 * as soon as we call pci_request_irq, so we have to setup our
2502 * clean_rx handler before we do so. */
2503 igb_configure(adapter);
2504
2505 err = igb_request_irq(adapter);
2506 if (err)
2507 goto err_req_irq;
2508
2509 /* From here on the code is the same as igb_up() */
2510 clear_bit(__IGB_DOWN, &adapter->state);
2511
2512 for (i = 0; i < adapter->num_q_vectors; i++)
2513 napi_enable(&(adapter->q_vector[i]->napi));
2514
2515 /* Clear any pending interrupts. */
2516 rd32(E1000_ICR);
2517
2518 igb_irq_enable(adapter);
2519
2520 /* notify VFs that reset has been completed */
2521 if (adapter->vfs_allocated_count) {
2522 u32 reg_data = rd32(E1000_CTRL_EXT);
2523 reg_data |= E1000_CTRL_EXT_PFRSTD;
2524 wr32(E1000_CTRL_EXT, reg_data);
2525 }
2526
2527 netif_tx_start_all_queues(netdev);
2528
2529 if (!resuming)
2530 pm_runtime_put(&pdev->dev);
2531
2532 /* start the watchdog. */
2533 hw->mac.get_link_status = 1;
2534 schedule_work(&adapter->watchdog_task);
2535
2536 return 0;
2537
2538 err_req_irq:
2539 igb_release_hw_control(adapter);
2540 igb_power_down_link(adapter);
2541 igb_free_all_rx_resources(adapter);
2542 err_setup_rx:
2543 igb_free_all_tx_resources(adapter);
2544 err_setup_tx:
2545 igb_reset(adapter);
2546 if (!resuming)
2547 pm_runtime_put(&pdev->dev);
2548
2549 return err;
2550 }
2551
2552 static int igb_open(struct net_device *netdev)
2553 {
2554 return __igb_open(netdev, false);
2555 }
2556
2557 /**
2558 * igb_close - Disables a network interface
2559 * @netdev: network interface device structure
2560 *
2561 * Returns 0, this is not allowed to fail
2562 *
2563 * The close entry point is called when an interface is de-activated
2564 * by the OS. The hardware is still under the driver's control, but
2565 * needs to be disabled. A global MAC reset is issued to stop the
2566 * hardware, and all transmit and receive resources are freed.
2567 **/
2568 static int __igb_close(struct net_device *netdev, bool suspending)
2569 {
2570 struct igb_adapter *adapter = netdev_priv(netdev);
2571 struct pci_dev *pdev = adapter->pdev;
2572
2573 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2574
2575 if (!suspending)
2576 pm_runtime_get_sync(&pdev->dev);
2577
2578 igb_down(adapter);
2579 igb_free_irq(adapter);
2580
2581 igb_free_all_tx_resources(adapter);
2582 igb_free_all_rx_resources(adapter);
2583
2584 if (!suspending)
2585 pm_runtime_put_sync(&pdev->dev);
2586 return 0;
2587 }
2588
2589 static int igb_close(struct net_device *netdev)
2590 {
2591 return __igb_close(netdev, false);
2592 }
2593
2594 /**
2595 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2596 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2597 *
2598 * Return 0 on success, negative on failure
2599 **/
2600 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2601 {
2602 struct device *dev = tx_ring->dev;
2603 int orig_node = dev_to_node(dev);
2604 int size;
2605
2606 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2607 tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2608 if (!tx_ring->tx_buffer_info)
2609 tx_ring->tx_buffer_info = vzalloc(size);
2610 if (!tx_ring->tx_buffer_info)
2611 goto err;
2612
2613 /* round up to nearest 4K */
2614 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2615 tx_ring->size = ALIGN(tx_ring->size, 4096);
2616
2617 set_dev_node(dev, tx_ring->numa_node);
2618 tx_ring->desc = dma_alloc_coherent(dev,
2619 tx_ring->size,
2620 &tx_ring->dma,
2621 GFP_KERNEL);
2622 set_dev_node(dev, orig_node);
2623 if (!tx_ring->desc)
2624 tx_ring->desc = dma_alloc_coherent(dev,
2625 tx_ring->size,
2626 &tx_ring->dma,
2627 GFP_KERNEL);
2628
2629 if (!tx_ring->desc)
2630 goto err;
2631
2632 tx_ring->next_to_use = 0;
2633 tx_ring->next_to_clean = 0;
2634
2635 return 0;
2636
2637 err:
2638 vfree(tx_ring->tx_buffer_info);
2639 dev_err(dev,
2640 "Unable to allocate memory for the transmit descriptor ring\n");
2641 return -ENOMEM;
2642 }
2643
2644 /**
2645 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2646 * (Descriptors) for all queues
2647 * @adapter: board private structure
2648 *
2649 * Return 0 on success, negative on failure
2650 **/
2651 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2652 {
2653 struct pci_dev *pdev = adapter->pdev;
2654 int i, err = 0;
2655
2656 for (i = 0; i < adapter->num_tx_queues; i++) {
2657 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2658 if (err) {
2659 dev_err(&pdev->dev,
2660 "Allocation for Tx Queue %u failed\n", i);
2661 for (i--; i >= 0; i--)
2662 igb_free_tx_resources(adapter->tx_ring[i]);
2663 break;
2664 }
2665 }
2666
2667 return err;
2668 }
2669
2670 /**
2671 * igb_setup_tctl - configure the transmit control registers
2672 * @adapter: Board private structure
2673 **/
2674 void igb_setup_tctl(struct igb_adapter *adapter)
2675 {
2676 struct e1000_hw *hw = &adapter->hw;
2677 u32 tctl;
2678
2679 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2680 wr32(E1000_TXDCTL(0), 0);
2681
2682 /* Program the Transmit Control Register */
2683 tctl = rd32(E1000_TCTL);
2684 tctl &= ~E1000_TCTL_CT;
2685 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2686 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2687
2688 igb_config_collision_dist(hw);
2689
2690 /* Enable transmits */
2691 tctl |= E1000_TCTL_EN;
2692
2693 wr32(E1000_TCTL, tctl);
2694 }
2695
2696 /**
2697 * igb_configure_tx_ring - Configure transmit ring after Reset
2698 * @adapter: board private structure
2699 * @ring: tx ring to configure
2700 *
2701 * Configure a transmit ring after a reset.
2702 **/
2703 void igb_configure_tx_ring(struct igb_adapter *adapter,
2704 struct igb_ring *ring)
2705 {
2706 struct e1000_hw *hw = &adapter->hw;
2707 u32 txdctl = 0;
2708 u64 tdba = ring->dma;
2709 int reg_idx = ring->reg_idx;
2710
2711 /* disable the queue */
2712 wr32(E1000_TXDCTL(reg_idx), 0);
2713 wrfl();
2714 mdelay(10);
2715
2716 wr32(E1000_TDLEN(reg_idx),
2717 ring->count * sizeof(union e1000_adv_tx_desc));
2718 wr32(E1000_TDBAL(reg_idx),
2719 tdba & 0x00000000ffffffffULL);
2720 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2721
2722 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2723 wr32(E1000_TDH(reg_idx), 0);
2724 writel(0, ring->tail);
2725
2726 txdctl |= IGB_TX_PTHRESH;
2727 txdctl |= IGB_TX_HTHRESH << 8;
2728 txdctl |= IGB_TX_WTHRESH << 16;
2729
2730 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2731 wr32(E1000_TXDCTL(reg_idx), txdctl);
2732 }
2733
2734 /**
2735 * igb_configure_tx - Configure transmit Unit after Reset
2736 * @adapter: board private structure
2737 *
2738 * Configure the Tx unit of the MAC after a reset.
2739 **/
2740 static void igb_configure_tx(struct igb_adapter *adapter)
2741 {
2742 int i;
2743
2744 for (i = 0; i < adapter->num_tx_queues; i++)
2745 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2746 }
2747
2748 /**
2749 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2750 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2751 *
2752 * Returns 0 on success, negative on failure
2753 **/
2754 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2755 {
2756 struct device *dev = rx_ring->dev;
2757 int orig_node = dev_to_node(dev);
2758 int size, desc_len;
2759
2760 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2761 rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2762 if (!rx_ring->rx_buffer_info)
2763 rx_ring->rx_buffer_info = vzalloc(size);
2764 if (!rx_ring->rx_buffer_info)
2765 goto err;
2766
2767 desc_len = sizeof(union e1000_adv_rx_desc);
2768
2769 /* Round up to nearest 4K */
2770 rx_ring->size = rx_ring->count * desc_len;
2771 rx_ring->size = ALIGN(rx_ring->size, 4096);
2772
2773 set_dev_node(dev, rx_ring->numa_node);
2774 rx_ring->desc = dma_alloc_coherent(dev,
2775 rx_ring->size,
2776 &rx_ring->dma,
2777 GFP_KERNEL);
2778 set_dev_node(dev, orig_node);
2779 if (!rx_ring->desc)
2780 rx_ring->desc = dma_alloc_coherent(dev,
2781 rx_ring->size,
2782 &rx_ring->dma,
2783 GFP_KERNEL);
2784
2785 if (!rx_ring->desc)
2786 goto err;
2787
2788 rx_ring->next_to_clean = 0;
2789 rx_ring->next_to_use = 0;
2790
2791 return 0;
2792
2793 err:
2794 vfree(rx_ring->rx_buffer_info);
2795 rx_ring->rx_buffer_info = NULL;
2796 dev_err(dev, "Unable to allocate memory for the receive descriptor"
2797 " ring\n");
2798 return -ENOMEM;
2799 }
2800
2801 /**
2802 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2803 * (Descriptors) for all queues
2804 * @adapter: board private structure
2805 *
2806 * Return 0 on success, negative on failure
2807 **/
2808 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2809 {
2810 struct pci_dev *pdev = adapter->pdev;
2811 int i, err = 0;
2812
2813 for (i = 0; i < adapter->num_rx_queues; i++) {
2814 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2815 if (err) {
2816 dev_err(&pdev->dev,
2817 "Allocation for Rx Queue %u failed\n", i);
2818 for (i--; i >= 0; i--)
2819 igb_free_rx_resources(adapter->rx_ring[i]);
2820 break;
2821 }
2822 }
2823
2824 return err;
2825 }
2826
2827 /**
2828 * igb_setup_mrqc - configure the multiple receive queue control registers
2829 * @adapter: Board private structure
2830 **/
2831 static void igb_setup_mrqc(struct igb_adapter *adapter)
2832 {
2833 struct e1000_hw *hw = &adapter->hw;
2834 u32 mrqc, rxcsum;
2835 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2836 union e1000_reta {
2837 u32 dword;
2838 u8 bytes[4];
2839 } reta;
2840 static const u8 rsshash[40] = {
2841 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2842 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2843 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2844 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2845
2846 /* Fill out hash function seeds */
2847 for (j = 0; j < 10; j++) {
2848 u32 rsskey = rsshash[(j * 4)];
2849 rsskey |= rsshash[(j * 4) + 1] << 8;
2850 rsskey |= rsshash[(j * 4) + 2] << 16;
2851 rsskey |= rsshash[(j * 4) + 3] << 24;
2852 array_wr32(E1000_RSSRK(0), j, rsskey);
2853 }
2854
2855 num_rx_queues = adapter->rss_queues;
2856
2857 if (adapter->vfs_allocated_count) {
2858 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2859 switch (hw->mac.type) {
2860 case e1000_i350:
2861 case e1000_82580:
2862 num_rx_queues = 1;
2863 shift = 0;
2864 break;
2865 case e1000_82576:
2866 shift = 3;
2867 num_rx_queues = 2;
2868 break;
2869 case e1000_82575:
2870 shift = 2;
2871 shift2 = 6;
2872 default:
2873 break;
2874 }
2875 } else {
2876 if (hw->mac.type == e1000_82575)
2877 shift = 6;
2878 }
2879
2880 for (j = 0; j < (32 * 4); j++) {
2881 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2882 if (shift2)
2883 reta.bytes[j & 3] |= num_rx_queues << shift2;
2884 if ((j & 3) == 3)
2885 wr32(E1000_RETA(j >> 2), reta.dword);
2886 }
2887
2888 /*
2889 * Disable raw packet checksumming so that RSS hash is placed in
2890 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2891 * offloads as they are enabled by default
2892 */
2893 rxcsum = rd32(E1000_RXCSUM);
2894 rxcsum |= E1000_RXCSUM_PCSD;
2895
2896 if (adapter->hw.mac.type >= e1000_82576)
2897 /* Enable Receive Checksum Offload for SCTP */
2898 rxcsum |= E1000_RXCSUM_CRCOFL;
2899
2900 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2901 wr32(E1000_RXCSUM, rxcsum);
2902 /*
2903 * Generate RSS hash based on TCP port numbers and/or
2904 * IPv4/v6 src and dst addresses since UDP cannot be
2905 * hashed reliably due to IP fragmentation
2906 */
2907
2908 mrqc = E1000_MRQC_RSS_FIELD_IPV4 |
2909 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2910 E1000_MRQC_RSS_FIELD_IPV6 |
2911 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2912 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2913
2914 /* If VMDq is enabled then we set the appropriate mode for that, else
2915 * we default to RSS so that an RSS hash is calculated per packet even
2916 * if we are only using one queue */
2917 if (adapter->vfs_allocated_count) {
2918 if (hw->mac.type > e1000_82575) {
2919 /* Set the default pool for the PF's first queue */
2920 u32 vtctl = rd32(E1000_VT_CTL);
2921 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2922 E1000_VT_CTL_DISABLE_DEF_POOL);
2923 vtctl |= adapter->vfs_allocated_count <<
2924 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2925 wr32(E1000_VT_CTL, vtctl);
2926 }
2927 if (adapter->rss_queues > 1)
2928 mrqc |= E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2929 else
2930 mrqc |= E1000_MRQC_ENABLE_VMDQ;
2931 } else {
2932 if (hw->mac.type != e1000_i211)
2933 mrqc |= E1000_MRQC_ENABLE_RSS_4Q;
2934 }
2935 igb_vmm_control(adapter);
2936
2937 wr32(E1000_MRQC, mrqc);
2938 }
2939
2940 /**
2941 * igb_setup_rctl - configure the receive control registers
2942 * @adapter: Board private structure
2943 **/
2944 void igb_setup_rctl(struct igb_adapter *adapter)
2945 {
2946 struct e1000_hw *hw = &adapter->hw;
2947 u32 rctl;
2948
2949 rctl = rd32(E1000_RCTL);
2950
2951 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2952 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2953
2954 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2955 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2956
2957 /*
2958 * enable stripping of CRC. It's unlikely this will break BMC
2959 * redirection as it did with e1000. Newer features require
2960 * that the HW strips the CRC.
2961 */
2962 rctl |= E1000_RCTL_SECRC;
2963
2964 /* disable store bad packets and clear size bits. */
2965 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2966
2967 /* enable LPE to prevent packets larger than max_frame_size */
2968 rctl |= E1000_RCTL_LPE;
2969
2970 /* disable queue 0 to prevent tail write w/o re-config */
2971 wr32(E1000_RXDCTL(0), 0);
2972
2973 /* Attention!!! For SR-IOV PF driver operations you must enable
2974 * queue drop for all VF and PF queues to prevent head of line blocking
2975 * if an un-trusted VF does not provide descriptors to hardware.
2976 */
2977 if (adapter->vfs_allocated_count) {
2978 /* set all queue drop enable bits */
2979 wr32(E1000_QDE, ALL_QUEUES);
2980 }
2981
2982 /* This is useful for sniffing bad packets. */
2983 if (adapter->netdev->features & NETIF_F_RXALL) {
2984 /* UPE and MPE will be handled by normal PROMISC logic
2985 * in e1000e_set_rx_mode */
2986 rctl |= (E1000_RCTL_SBP | /* Receive bad packets */
2987 E1000_RCTL_BAM | /* RX All Bcast Pkts */
2988 E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
2989
2990 rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */
2991 E1000_RCTL_DPF | /* Allow filtered pause */
2992 E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */
2993 /* Do not mess with E1000_CTRL_VME, it affects transmit as well,
2994 * and that breaks VLANs.
2995 */
2996 }
2997
2998 wr32(E1000_RCTL, rctl);
2999 }
3000
3001 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3002 int vfn)
3003 {
3004 struct e1000_hw *hw = &adapter->hw;
3005 u32 vmolr;
3006
3007 /* if it isn't the PF check to see if VFs are enabled and
3008 * increase the size to support vlan tags */
3009 if (vfn < adapter->vfs_allocated_count &&
3010 adapter->vf_data[vfn].vlans_enabled)
3011 size += VLAN_TAG_SIZE;
3012
3013 vmolr = rd32(E1000_VMOLR(vfn));
3014 vmolr &= ~E1000_VMOLR_RLPML_MASK;
3015 vmolr |= size | E1000_VMOLR_LPE;
3016 wr32(E1000_VMOLR(vfn), vmolr);
3017
3018 return 0;
3019 }
3020
3021 /**
3022 * igb_rlpml_set - set maximum receive packet size
3023 * @adapter: board private structure
3024 *
3025 * Configure maximum receivable packet size.
3026 **/
3027 static void igb_rlpml_set(struct igb_adapter *adapter)
3028 {
3029 u32 max_frame_size = adapter->max_frame_size;
3030 struct e1000_hw *hw = &adapter->hw;
3031 u16 pf_id = adapter->vfs_allocated_count;
3032
3033 if (pf_id) {
3034 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3035 /*
3036 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3037 * to our max jumbo frame size, in case we need to enable
3038 * jumbo frames on one of the rings later.
3039 * This will not pass over-length frames into the default
3040 * queue because it's gated by the VMOLR.RLPML.
3041 */
3042 max_frame_size = MAX_JUMBO_FRAME_SIZE;
3043 }
3044
3045 wr32(E1000_RLPML, max_frame_size);
3046 }
3047
3048 static inline void igb_set_vmolr(struct igb_adapter *adapter,
3049 int vfn, bool aupe)
3050 {
3051 struct e1000_hw *hw = &adapter->hw;
3052 u32 vmolr;
3053
3054 /*
3055 * This register exists only on 82576 and newer so if we are older then
3056 * we should exit and do nothing
3057 */
3058 if (hw->mac.type < e1000_82576)
3059 return;
3060
3061 vmolr = rd32(E1000_VMOLR(vfn));
3062 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
3063 if (aupe)
3064 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
3065 else
3066 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3067
3068 /* clear all bits that might not be set */
3069 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3070
3071 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3072 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3073 /*
3074 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3075 * multicast packets
3076 */
3077 if (vfn <= adapter->vfs_allocated_count)
3078 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
3079
3080 wr32(E1000_VMOLR(vfn), vmolr);
3081 }
3082
3083 /**
3084 * igb_configure_rx_ring - Configure a receive ring after Reset
3085 * @adapter: board private structure
3086 * @ring: receive ring to be configured
3087 *
3088 * Configure the Rx unit of the MAC after a reset.
3089 **/
3090 void igb_configure_rx_ring(struct igb_adapter *adapter,
3091 struct igb_ring *ring)
3092 {
3093 struct e1000_hw *hw = &adapter->hw;
3094 u64 rdba = ring->dma;
3095 int reg_idx = ring->reg_idx;
3096 u32 srrctl = 0, rxdctl = 0;
3097
3098 /* disable the queue */
3099 wr32(E1000_RXDCTL(reg_idx), 0);
3100
3101 /* Set DMA base address registers */
3102 wr32(E1000_RDBAL(reg_idx),
3103 rdba & 0x00000000ffffffffULL);
3104 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3105 wr32(E1000_RDLEN(reg_idx),
3106 ring->count * sizeof(union e1000_adv_rx_desc));
3107
3108 /* initialize head and tail */
3109 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3110 wr32(E1000_RDH(reg_idx), 0);
3111 writel(0, ring->tail);
3112
3113 /* set descriptor configuration */
3114 srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3115 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3116 srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3117 #else
3118 srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3119 #endif
3120 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3121 if (hw->mac.type >= e1000_82580)
3122 srrctl |= E1000_SRRCTL_TIMESTAMP;
3123 /* Only set Drop Enable if we are supporting multiple queues */
3124 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3125 srrctl |= E1000_SRRCTL_DROP_EN;
3126
3127 wr32(E1000_SRRCTL(reg_idx), srrctl);
3128
3129 /* set filtering for VMDQ pools */
3130 igb_set_vmolr(adapter, reg_idx & 0x7, true);
3131
3132 rxdctl |= IGB_RX_PTHRESH;
3133 rxdctl |= IGB_RX_HTHRESH << 8;
3134 rxdctl |= IGB_RX_WTHRESH << 16;
3135
3136 /* enable receive descriptor fetching */
3137 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3138 wr32(E1000_RXDCTL(reg_idx), rxdctl);
3139 }
3140
3141 /**
3142 * igb_configure_rx - Configure receive Unit after Reset
3143 * @adapter: board private structure
3144 *
3145 * Configure the Rx unit of the MAC after a reset.
3146 **/
3147 static void igb_configure_rx(struct igb_adapter *adapter)
3148 {
3149 int i;
3150
3151 /* set UTA to appropriate mode */
3152 igb_set_uta(adapter);
3153
3154 /* set the correct pool for the PF default MAC address in entry 0 */
3155 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3156 adapter->vfs_allocated_count);
3157
3158 /* Setup the HW Rx Head and Tail Descriptor Pointers and
3159 * the Base and Length of the Rx Descriptor Ring */
3160 for (i = 0; i < adapter->num_rx_queues; i++)
3161 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3162 }
3163
3164 /**
3165 * igb_free_tx_resources - Free Tx Resources per Queue
3166 * @tx_ring: Tx descriptor ring for a specific queue
3167 *
3168 * Free all transmit software resources
3169 **/
3170 void igb_free_tx_resources(struct igb_ring *tx_ring)
3171 {
3172 igb_clean_tx_ring(tx_ring);
3173
3174 vfree(tx_ring->tx_buffer_info);
3175 tx_ring->tx_buffer_info = NULL;
3176
3177 /* if not set, then don't free */
3178 if (!tx_ring->desc)
3179 return;
3180
3181 dma_free_coherent(tx_ring->dev, tx_ring->size,
3182 tx_ring->desc, tx_ring->dma);
3183
3184 tx_ring->desc = NULL;
3185 }
3186
3187 /**
3188 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3189 * @adapter: board private structure
3190 *
3191 * Free all transmit software resources
3192 **/
3193 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3194 {
3195 int i;
3196
3197 for (i = 0; i < adapter->num_tx_queues; i++)
3198 igb_free_tx_resources(adapter->tx_ring[i]);
3199 }
3200
3201 void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3202 struct igb_tx_buffer *tx_buffer)
3203 {
3204 if (tx_buffer->skb) {
3205 dev_kfree_skb_any(tx_buffer->skb);
3206 if (tx_buffer->dma)
3207 dma_unmap_single(ring->dev,
3208 tx_buffer->dma,
3209 tx_buffer->length,
3210 DMA_TO_DEVICE);
3211 } else if (tx_buffer->dma) {
3212 dma_unmap_page(ring->dev,
3213 tx_buffer->dma,
3214 tx_buffer->length,
3215 DMA_TO_DEVICE);
3216 }
3217 tx_buffer->next_to_watch = NULL;
3218 tx_buffer->skb = NULL;
3219 tx_buffer->dma = 0;
3220 /* buffer_info must be completely set up in the transmit path */
3221 }
3222
3223 /**
3224 * igb_clean_tx_ring - Free Tx Buffers
3225 * @tx_ring: ring to be cleaned
3226 **/
3227 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3228 {
3229 struct igb_tx_buffer *buffer_info;
3230 unsigned long size;
3231 u16 i;
3232
3233 if (!tx_ring->tx_buffer_info)
3234 return;
3235 /* Free all the Tx ring sk_buffs */
3236
3237 for (i = 0; i < tx_ring->count; i++) {
3238 buffer_info = &tx_ring->tx_buffer_info[i];
3239 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3240 }
3241
3242 netdev_tx_reset_queue(txring_txq(tx_ring));
3243
3244 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3245 memset(tx_ring->tx_buffer_info, 0, size);
3246
3247 /* Zero out the descriptor ring */
3248 memset(tx_ring->desc, 0, tx_ring->size);
3249
3250 tx_ring->next_to_use = 0;
3251 tx_ring->next_to_clean = 0;
3252 }
3253
3254 /**
3255 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3256 * @adapter: board private structure
3257 **/
3258 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3259 {
3260 int i;
3261
3262 for (i = 0; i < adapter->num_tx_queues; i++)
3263 igb_clean_tx_ring(adapter->tx_ring[i]);
3264 }
3265
3266 /**
3267 * igb_free_rx_resources - Free Rx Resources
3268 * @rx_ring: ring to clean the resources from
3269 *
3270 * Free all receive software resources
3271 **/
3272 void igb_free_rx_resources(struct igb_ring *rx_ring)
3273 {
3274 igb_clean_rx_ring(rx_ring);
3275
3276 vfree(rx_ring->rx_buffer_info);
3277 rx_ring->rx_buffer_info = NULL;
3278
3279 /* if not set, then don't free */
3280 if (!rx_ring->desc)
3281 return;
3282
3283 dma_free_coherent(rx_ring->dev, rx_ring->size,
3284 rx_ring->desc, rx_ring->dma);
3285
3286 rx_ring->desc = NULL;
3287 }
3288
3289 /**
3290 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3291 * @adapter: board private structure
3292 *
3293 * Free all receive software resources
3294 **/
3295 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3296 {
3297 int i;
3298
3299 for (i = 0; i < adapter->num_rx_queues; i++)
3300 igb_free_rx_resources(adapter->rx_ring[i]);
3301 }
3302
3303 /**
3304 * igb_clean_rx_ring - Free Rx Buffers per Queue
3305 * @rx_ring: ring to free buffers from
3306 **/
3307 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3308 {
3309 unsigned long size;
3310 u16 i;
3311
3312 if (!rx_ring->rx_buffer_info)
3313 return;
3314
3315 /* Free all the Rx ring sk_buffs */
3316 for (i = 0; i < rx_ring->count; i++) {
3317 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3318 if (buffer_info->dma) {
3319 dma_unmap_single(rx_ring->dev,
3320 buffer_info->dma,
3321 IGB_RX_HDR_LEN,
3322 DMA_FROM_DEVICE);
3323 buffer_info->dma = 0;
3324 }
3325
3326 if (buffer_info->skb) {
3327 dev_kfree_skb(buffer_info->skb);
3328 buffer_info->skb = NULL;
3329 }
3330 if (buffer_info->page_dma) {
3331 dma_unmap_page(rx_ring->dev,
3332 buffer_info->page_dma,
3333 PAGE_SIZE / 2,
3334 DMA_FROM_DEVICE);
3335 buffer_info->page_dma = 0;
3336 }
3337 if (buffer_info->page) {
3338 put_page(buffer_info->page);
3339 buffer_info->page = NULL;
3340 buffer_info->page_offset = 0;
3341 }
3342 }
3343
3344 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3345 memset(rx_ring->rx_buffer_info, 0, size);
3346
3347 /* Zero out the descriptor ring */
3348 memset(rx_ring->desc, 0, rx_ring->size);
3349
3350 rx_ring->next_to_clean = 0;
3351 rx_ring->next_to_use = 0;
3352 }
3353
3354 /**
3355 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3356 * @adapter: board private structure
3357 **/
3358 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3359 {
3360 int i;
3361
3362 for (i = 0; i < adapter->num_rx_queues; i++)
3363 igb_clean_rx_ring(adapter->rx_ring[i]);
3364 }
3365
3366 /**
3367 * igb_set_mac - Change the Ethernet Address of the NIC
3368 * @netdev: network interface device structure
3369 * @p: pointer to an address structure
3370 *
3371 * Returns 0 on success, negative on failure
3372 **/
3373 static int igb_set_mac(struct net_device *netdev, void *p)
3374 {
3375 struct igb_adapter *adapter = netdev_priv(netdev);
3376 struct e1000_hw *hw = &adapter->hw;
3377 struct sockaddr *addr = p;
3378
3379 if (!is_valid_ether_addr(addr->sa_data))
3380 return -EADDRNOTAVAIL;
3381
3382 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3383 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3384
3385 /* set the correct pool for the new PF MAC address in entry 0 */
3386 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3387 adapter->vfs_allocated_count);
3388
3389 return 0;
3390 }
3391
3392 /**
3393 * igb_write_mc_addr_list - write multicast addresses to MTA
3394 * @netdev: network interface device structure
3395 *
3396 * Writes multicast address list to the MTA hash table.
3397 * Returns: -ENOMEM on failure
3398 * 0 on no addresses written
3399 * X on writing X addresses to MTA
3400 **/
3401 static int igb_write_mc_addr_list(struct net_device *netdev)
3402 {
3403 struct igb_adapter *adapter = netdev_priv(netdev);
3404 struct e1000_hw *hw = &adapter->hw;
3405 struct netdev_hw_addr *ha;
3406 u8 *mta_list;
3407 int i;
3408
3409 if (netdev_mc_empty(netdev)) {
3410 /* nothing to program, so clear mc list */
3411 igb_update_mc_addr_list(hw, NULL, 0);
3412 igb_restore_vf_multicasts(adapter);
3413 return 0;
3414 }
3415
3416 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3417 if (!mta_list)
3418 return -ENOMEM;
3419
3420 /* The shared function expects a packed array of only addresses. */
3421 i = 0;
3422 netdev_for_each_mc_addr(ha, netdev)
3423 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3424
3425 igb_update_mc_addr_list(hw, mta_list, i);
3426 kfree(mta_list);
3427
3428 return netdev_mc_count(netdev);
3429 }
3430
3431 /**
3432 * igb_write_uc_addr_list - write unicast addresses to RAR table
3433 * @netdev: network interface device structure
3434 *
3435 * Writes unicast address list to the RAR table.
3436 * Returns: -ENOMEM on failure/insufficient address space
3437 * 0 on no addresses written
3438 * X on writing X addresses to the RAR table
3439 **/
3440 static int igb_write_uc_addr_list(struct net_device *netdev)
3441 {
3442 struct igb_adapter *adapter = netdev_priv(netdev);
3443 struct e1000_hw *hw = &adapter->hw;
3444 unsigned int vfn = adapter->vfs_allocated_count;
3445 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3446 int count = 0;
3447
3448 /* return ENOMEM indicating insufficient memory for addresses */
3449 if (netdev_uc_count(netdev) > rar_entries)
3450 return -ENOMEM;
3451
3452 if (!netdev_uc_empty(netdev) && rar_entries) {
3453 struct netdev_hw_addr *ha;
3454
3455 netdev_for_each_uc_addr(ha, netdev) {
3456 if (!rar_entries)
3457 break;
3458 igb_rar_set_qsel(adapter, ha->addr,
3459 rar_entries--,
3460 vfn);
3461 count++;
3462 }
3463 }
3464 /* write the addresses in reverse order to avoid write combining */
3465 for (; rar_entries > 0 ; rar_entries--) {
3466 wr32(E1000_RAH(rar_entries), 0);
3467 wr32(E1000_RAL(rar_entries), 0);
3468 }
3469 wrfl();
3470
3471 return count;
3472 }
3473
3474 /**
3475 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3476 * @netdev: network interface device structure
3477 *
3478 * The set_rx_mode entry point is called whenever the unicast or multicast
3479 * address lists or the network interface flags are updated. This routine is
3480 * responsible for configuring the hardware for proper unicast, multicast,
3481 * promiscuous mode, and all-multi behavior.
3482 **/
3483 static void igb_set_rx_mode(struct net_device *netdev)
3484 {
3485 struct igb_adapter *adapter = netdev_priv(netdev);
3486 struct e1000_hw *hw = &adapter->hw;
3487 unsigned int vfn = adapter->vfs_allocated_count;
3488 u32 rctl, vmolr = 0;
3489 int count;
3490
3491 /* Check for Promiscuous and All Multicast modes */
3492 rctl = rd32(E1000_RCTL);
3493
3494 /* clear the effected bits */
3495 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3496
3497 if (netdev->flags & IFF_PROMISC) {
3498 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3499 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3500 } else {
3501 if (netdev->flags & IFF_ALLMULTI) {
3502 rctl |= E1000_RCTL_MPE;
3503 vmolr |= E1000_VMOLR_MPME;
3504 } else {
3505 /*
3506 * Write addresses to the MTA, if the attempt fails
3507 * then we should just turn on promiscuous mode so
3508 * that we can at least receive multicast traffic
3509 */
3510 count = igb_write_mc_addr_list(netdev);
3511 if (count < 0) {
3512 rctl |= E1000_RCTL_MPE;
3513 vmolr |= E1000_VMOLR_MPME;
3514 } else if (count) {
3515 vmolr |= E1000_VMOLR_ROMPE;
3516 }
3517 }
3518 /*
3519 * Write addresses to available RAR registers, if there is not
3520 * sufficient space to store all the addresses then enable
3521 * unicast promiscuous mode
3522 */
3523 count = igb_write_uc_addr_list(netdev);
3524 if (count < 0) {
3525 rctl |= E1000_RCTL_UPE;
3526 vmolr |= E1000_VMOLR_ROPE;
3527 }
3528 rctl |= E1000_RCTL_VFE;
3529 }
3530 wr32(E1000_RCTL, rctl);
3531
3532 /*
3533 * In order to support SR-IOV and eventually VMDq it is necessary to set
3534 * the VMOLR to enable the appropriate modes. Without this workaround
3535 * we will have issues with VLAN tag stripping not being done for frames
3536 * that are only arriving because we are the default pool
3537 */
3538 if ((hw->mac.type < e1000_82576) || (hw->mac.type > e1000_i350))
3539 return;
3540
3541 vmolr |= rd32(E1000_VMOLR(vfn)) &
3542 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3543 wr32(E1000_VMOLR(vfn), vmolr);
3544 igb_restore_vf_multicasts(adapter);
3545 }
3546
3547 static void igb_check_wvbr(struct igb_adapter *adapter)
3548 {
3549 struct e1000_hw *hw = &adapter->hw;
3550 u32 wvbr = 0;
3551
3552 switch (hw->mac.type) {
3553 case e1000_82576:
3554 case e1000_i350:
3555 if (!(wvbr = rd32(E1000_WVBR)))
3556 return;
3557 break;
3558 default:
3559 break;
3560 }
3561
3562 adapter->wvbr |= wvbr;
3563 }
3564
3565 #define IGB_STAGGERED_QUEUE_OFFSET 8
3566
3567 static void igb_spoof_check(struct igb_adapter *adapter)
3568 {
3569 int j;
3570
3571 if (!adapter->wvbr)
3572 return;
3573
3574 for(j = 0; j < adapter->vfs_allocated_count; j++) {
3575 if (adapter->wvbr & (1 << j) ||
3576 adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3577 dev_warn(&adapter->pdev->dev,
3578 "Spoof event(s) detected on VF %d\n", j);
3579 adapter->wvbr &=
3580 ~((1 << j) |
3581 (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3582 }
3583 }
3584 }
3585
3586 /* Need to wait a few seconds after link up to get diagnostic information from
3587 * the phy */
3588 static void igb_update_phy_info(unsigned long data)
3589 {
3590 struct igb_adapter *adapter = (struct igb_adapter *) data;
3591 igb_get_phy_info(&adapter->hw);
3592 }
3593
3594 /**
3595 * igb_has_link - check shared code for link and determine up/down
3596 * @adapter: pointer to driver private info
3597 **/
3598 bool igb_has_link(struct igb_adapter *adapter)
3599 {
3600 struct e1000_hw *hw = &adapter->hw;
3601 bool link_active = false;
3602 s32 ret_val = 0;
3603
3604 /* get_link_status is set on LSC (link status) interrupt or
3605 * rx sequence error interrupt. get_link_status will stay
3606 * false until the e1000_check_for_link establishes link
3607 * for copper adapters ONLY
3608 */
3609 switch (hw->phy.media_type) {
3610 case e1000_media_type_copper:
3611 if (hw->mac.get_link_status) {
3612 ret_val = hw->mac.ops.check_for_link(hw);
3613 link_active = !hw->mac.get_link_status;
3614 } else {
3615 link_active = true;
3616 }
3617 break;
3618 case e1000_media_type_internal_serdes:
3619 ret_val = hw->mac.ops.check_for_link(hw);
3620 link_active = hw->mac.serdes_has_link;
3621 break;
3622 default:
3623 case e1000_media_type_unknown:
3624 break;
3625 }
3626
3627 return link_active;
3628 }
3629
3630 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3631 {
3632 bool ret = false;
3633 u32 ctrl_ext, thstat;
3634
3635 /* check for thermal sensor event on i350 copper only */
3636 if (hw->mac.type == e1000_i350) {
3637 thstat = rd32(E1000_THSTAT);
3638 ctrl_ext = rd32(E1000_CTRL_EXT);
3639
3640 if ((hw->phy.media_type == e1000_media_type_copper) &&
3641 !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3642 ret = !!(thstat & event);
3643 }
3644 }
3645
3646 return ret;
3647 }
3648
3649 /**
3650 * igb_watchdog - Timer Call-back
3651 * @data: pointer to adapter cast into an unsigned long
3652 **/
3653 static void igb_watchdog(unsigned long data)
3654 {
3655 struct igb_adapter *adapter = (struct igb_adapter *)data;
3656 /* Do the rest outside of interrupt context */
3657 schedule_work(&adapter->watchdog_task);
3658 }
3659
3660 static void igb_watchdog_task(struct work_struct *work)
3661 {
3662 struct igb_adapter *adapter = container_of(work,
3663 struct igb_adapter,
3664 watchdog_task);
3665 struct e1000_hw *hw = &adapter->hw;
3666 struct net_device *netdev = adapter->netdev;
3667 u32 link;
3668 int i;
3669
3670 link = igb_has_link(adapter);
3671 if (link) {
3672 /* Cancel scheduled suspend requests. */
3673 pm_runtime_resume(netdev->dev.parent);
3674
3675 if (!netif_carrier_ok(netdev)) {
3676 u32 ctrl;
3677 hw->mac.ops.get_speed_and_duplex(hw,
3678 &adapter->link_speed,
3679 &adapter->link_duplex);
3680
3681 ctrl = rd32(E1000_CTRL);
3682 /* Links status message must follow this format */
3683 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s "
3684 "Duplex, Flow Control: %s\n",
3685 netdev->name,
3686 adapter->link_speed,
3687 adapter->link_duplex == FULL_DUPLEX ?
3688 "Full" : "Half",
3689 (ctrl & E1000_CTRL_TFCE) &&
3690 (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
3691 (ctrl & E1000_CTRL_RFCE) ? "RX" :
3692 (ctrl & E1000_CTRL_TFCE) ? "TX" : "None");
3693
3694 /* check for thermal sensor event */
3695 if (igb_thermal_sensor_event(hw,
3696 E1000_THSTAT_LINK_THROTTLE)) {
3697 netdev_info(netdev, "The network adapter link "
3698 "speed was downshifted because it "
3699 "overheated\n");
3700 }
3701
3702 /* adjust timeout factor according to speed/duplex */
3703 adapter->tx_timeout_factor = 1;
3704 switch (adapter->link_speed) {
3705 case SPEED_10:
3706 adapter->tx_timeout_factor = 14;
3707 break;
3708 case SPEED_100:
3709 /* maybe add some timeout factor ? */
3710 break;
3711 }
3712
3713 netif_carrier_on(netdev);
3714
3715 igb_ping_all_vfs(adapter);
3716 igb_check_vf_rate_limit(adapter);
3717
3718 /* link state has changed, schedule phy info update */
3719 if (!test_bit(__IGB_DOWN, &adapter->state))
3720 mod_timer(&adapter->phy_info_timer,
3721 round_jiffies(jiffies + 2 * HZ));
3722 }
3723 } else {
3724 if (netif_carrier_ok(netdev)) {
3725 adapter->link_speed = 0;
3726 adapter->link_duplex = 0;
3727
3728 /* check for thermal sensor event */
3729 if (igb_thermal_sensor_event(hw,
3730 E1000_THSTAT_PWR_DOWN)) {
3731 netdev_err(netdev, "The network adapter was "
3732 "stopped because it overheated\n");
3733 }
3734
3735 /* Links status message must follow this format */
3736 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3737 netdev->name);
3738 netif_carrier_off(netdev);
3739
3740 igb_ping_all_vfs(adapter);
3741
3742 /* link state has changed, schedule phy info update */
3743 if (!test_bit(__IGB_DOWN, &adapter->state))
3744 mod_timer(&adapter->phy_info_timer,
3745 round_jiffies(jiffies + 2 * HZ));
3746
3747 pm_schedule_suspend(netdev->dev.parent,
3748 MSEC_PER_SEC * 5);
3749 }
3750 }
3751
3752 spin_lock(&adapter->stats64_lock);
3753 igb_update_stats(adapter, &adapter->stats64);
3754 spin_unlock(&adapter->stats64_lock);
3755
3756 for (i = 0; i < adapter->num_tx_queues; i++) {
3757 struct igb_ring *tx_ring = adapter->tx_ring[i];
3758 if (!netif_carrier_ok(netdev)) {
3759 /* We've lost link, so the controller stops DMA,
3760 * but we've got queued Tx work that's never going
3761 * to get done, so reset controller to flush Tx.
3762 * (Do the reset outside of interrupt context). */
3763 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3764 adapter->tx_timeout_count++;
3765 schedule_work(&adapter->reset_task);
3766 /* return immediately since reset is imminent */
3767 return;
3768 }
3769 }
3770
3771 /* Force detection of hung controller every watchdog period */
3772 set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3773 }
3774
3775 /* Cause software interrupt to ensure rx ring is cleaned */
3776 if (adapter->msix_entries) {
3777 u32 eics = 0;
3778 for (i = 0; i < adapter->num_q_vectors; i++)
3779 eics |= adapter->q_vector[i]->eims_value;
3780 wr32(E1000_EICS, eics);
3781 } else {
3782 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3783 }
3784
3785 igb_spoof_check(adapter);
3786
3787 /* Reset the timer */
3788 if (!test_bit(__IGB_DOWN, &adapter->state))
3789 mod_timer(&adapter->watchdog_timer,
3790 round_jiffies(jiffies + 2 * HZ));
3791 }
3792
3793 enum latency_range {
3794 lowest_latency = 0,
3795 low_latency = 1,
3796 bulk_latency = 2,
3797 latency_invalid = 255
3798 };
3799
3800 /**
3801 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3802 *
3803 * Stores a new ITR value based on strictly on packet size. This
3804 * algorithm is less sophisticated than that used in igb_update_itr,
3805 * due to the difficulty of synchronizing statistics across multiple
3806 * receive rings. The divisors and thresholds used by this function
3807 * were determined based on theoretical maximum wire speed and testing
3808 * data, in order to minimize response time while increasing bulk
3809 * throughput.
3810 * This functionality is controlled by the InterruptThrottleRate module
3811 * parameter (see igb_param.c)
3812 * NOTE: This function is called only when operating in a multiqueue
3813 * receive environment.
3814 * @q_vector: pointer to q_vector
3815 **/
3816 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3817 {
3818 int new_val = q_vector->itr_val;
3819 int avg_wire_size = 0;
3820 struct igb_adapter *adapter = q_vector->adapter;
3821 unsigned int packets;
3822
3823 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3824 * ints/sec - ITR timer value of 120 ticks.
3825 */
3826 if (adapter->link_speed != SPEED_1000) {
3827 new_val = IGB_4K_ITR;
3828 goto set_itr_val;
3829 }
3830
3831 packets = q_vector->rx.total_packets;
3832 if (packets)
3833 avg_wire_size = q_vector->rx.total_bytes / packets;
3834
3835 packets = q_vector->tx.total_packets;
3836 if (packets)
3837 avg_wire_size = max_t(u32, avg_wire_size,
3838 q_vector->tx.total_bytes / packets);
3839
3840 /* if avg_wire_size isn't set no work was done */
3841 if (!avg_wire_size)
3842 goto clear_counts;
3843
3844 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3845 avg_wire_size += 24;
3846
3847 /* Don't starve jumbo frames */
3848 avg_wire_size = min(avg_wire_size, 3000);
3849
3850 /* Give a little boost to mid-size frames */
3851 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3852 new_val = avg_wire_size / 3;
3853 else
3854 new_val = avg_wire_size / 2;
3855
3856 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3857 if (new_val < IGB_20K_ITR &&
3858 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3859 (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3860 new_val = IGB_20K_ITR;
3861
3862 set_itr_val:
3863 if (new_val != q_vector->itr_val) {
3864 q_vector->itr_val = new_val;
3865 q_vector->set_itr = 1;
3866 }
3867 clear_counts:
3868 q_vector->rx.total_bytes = 0;
3869 q_vector->rx.total_packets = 0;
3870 q_vector->tx.total_bytes = 0;
3871 q_vector->tx.total_packets = 0;
3872 }
3873
3874 /**
3875 * igb_update_itr - update the dynamic ITR value based on statistics
3876 * Stores a new ITR value based on packets and byte
3877 * counts during the last interrupt. The advantage of per interrupt
3878 * computation is faster updates and more accurate ITR for the current
3879 * traffic pattern. Constants in this function were computed
3880 * based on theoretical maximum wire speed and thresholds were set based
3881 * on testing data as well as attempting to minimize response time
3882 * while increasing bulk throughput.
3883 * this functionality is controlled by the InterruptThrottleRate module
3884 * parameter (see igb_param.c)
3885 * NOTE: These calculations are only valid when operating in a single-
3886 * queue environment.
3887 * @q_vector: pointer to q_vector
3888 * @ring_container: ring info to update the itr for
3889 **/
3890 static void igb_update_itr(struct igb_q_vector *q_vector,
3891 struct igb_ring_container *ring_container)
3892 {
3893 unsigned int packets = ring_container->total_packets;
3894 unsigned int bytes = ring_container->total_bytes;
3895 u8 itrval = ring_container->itr;
3896
3897 /* no packets, exit with status unchanged */
3898 if (packets == 0)
3899 return;
3900
3901 switch (itrval) {
3902 case lowest_latency:
3903 /* handle TSO and jumbo frames */
3904 if (bytes/packets > 8000)
3905 itrval = bulk_latency;
3906 else if ((packets < 5) && (bytes > 512))
3907 itrval = low_latency;
3908 break;
3909 case low_latency: /* 50 usec aka 20000 ints/s */
3910 if (bytes > 10000) {
3911 /* this if handles the TSO accounting */
3912 if (bytes/packets > 8000) {
3913 itrval = bulk_latency;
3914 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3915 itrval = bulk_latency;
3916 } else if ((packets > 35)) {
3917 itrval = lowest_latency;
3918 }
3919 } else if (bytes/packets > 2000) {
3920 itrval = bulk_latency;
3921 } else if (packets <= 2 && bytes < 512) {
3922 itrval = lowest_latency;
3923 }
3924 break;
3925 case bulk_latency: /* 250 usec aka 4000 ints/s */
3926 if (bytes > 25000) {
3927 if (packets > 35)
3928 itrval = low_latency;
3929 } else if (bytes < 1500) {
3930 itrval = low_latency;
3931 }
3932 break;
3933 }
3934
3935 /* clear work counters since we have the values we need */
3936 ring_container->total_bytes = 0;
3937 ring_container->total_packets = 0;
3938
3939 /* write updated itr to ring container */
3940 ring_container->itr = itrval;
3941 }
3942
3943 static void igb_set_itr(struct igb_q_vector *q_vector)
3944 {
3945 struct igb_adapter *adapter = q_vector->adapter;
3946 u32 new_itr = q_vector->itr_val;
3947 u8 current_itr = 0;
3948
3949 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3950 if (adapter->link_speed != SPEED_1000) {
3951 current_itr = 0;
3952 new_itr = IGB_4K_ITR;
3953 goto set_itr_now;
3954 }
3955
3956 igb_update_itr(q_vector, &q_vector->tx);
3957 igb_update_itr(q_vector, &q_vector->rx);
3958
3959 current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
3960
3961 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3962 if (current_itr == lowest_latency &&
3963 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3964 (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3965 current_itr = low_latency;
3966
3967 switch (current_itr) {
3968 /* counts and packets in update_itr are dependent on these numbers */
3969 case lowest_latency:
3970 new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
3971 break;
3972 case low_latency:
3973 new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
3974 break;
3975 case bulk_latency:
3976 new_itr = IGB_4K_ITR; /* 4,000 ints/sec */
3977 break;
3978 default:
3979 break;
3980 }
3981
3982 set_itr_now:
3983 if (new_itr != q_vector->itr_val) {
3984 /* this attempts to bias the interrupt rate towards Bulk
3985 * by adding intermediate steps when interrupt rate is
3986 * increasing */
3987 new_itr = new_itr > q_vector->itr_val ?
3988 max((new_itr * q_vector->itr_val) /
3989 (new_itr + (q_vector->itr_val >> 2)),
3990 new_itr) :
3991 new_itr;
3992 /* Don't write the value here; it resets the adapter's
3993 * internal timer, and causes us to delay far longer than
3994 * we should between interrupts. Instead, we write the ITR
3995 * value at the beginning of the next interrupt so the timing
3996 * ends up being correct.
3997 */
3998 q_vector->itr_val = new_itr;
3999 q_vector->set_itr = 1;
4000 }
4001 }
4002
4003 static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4004 u32 type_tucmd, u32 mss_l4len_idx)
4005 {
4006 struct e1000_adv_tx_context_desc *context_desc;
4007 u16 i = tx_ring->next_to_use;
4008
4009 context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4010
4011 i++;
4012 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4013
4014 /* set bits to identify this as an advanced context descriptor */
4015 type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4016
4017 /* For 82575, context index must be unique per ring. */
4018 if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4019 mss_l4len_idx |= tx_ring->reg_idx << 4;
4020
4021 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens);
4022 context_desc->seqnum_seed = 0;
4023 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd);
4024 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
4025 }
4026
4027 static int igb_tso(struct igb_ring *tx_ring,
4028 struct igb_tx_buffer *first,
4029 u8 *hdr_len)
4030 {
4031 struct sk_buff *skb = first->skb;
4032 u32 vlan_macip_lens, type_tucmd;
4033 u32 mss_l4len_idx, l4len;
4034
4035 if (!skb_is_gso(skb))
4036 return 0;
4037
4038 if (skb_header_cloned(skb)) {
4039 int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4040 if (err)
4041 return err;
4042 }
4043
4044 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4045 type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4046
4047 if (first->protocol == __constant_htons(ETH_P_IP)) {
4048 struct iphdr *iph = ip_hdr(skb);
4049 iph->tot_len = 0;
4050 iph->check = 0;
4051 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4052 iph->daddr, 0,
4053 IPPROTO_TCP,
4054 0);
4055 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4056 first->tx_flags |= IGB_TX_FLAGS_TSO |
4057 IGB_TX_FLAGS_CSUM |
4058 IGB_TX_FLAGS_IPV4;
4059 } else if (skb_is_gso_v6(skb)) {
4060 ipv6_hdr(skb)->payload_len = 0;
4061 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4062 &ipv6_hdr(skb)->daddr,
4063 0, IPPROTO_TCP, 0);
4064 first->tx_flags |= IGB_TX_FLAGS_TSO |
4065 IGB_TX_FLAGS_CSUM;
4066 }
4067
4068 /* compute header lengths */
4069 l4len = tcp_hdrlen(skb);
4070 *hdr_len = skb_transport_offset(skb) + l4len;
4071
4072 /* update gso size and bytecount with header size */
4073 first->gso_segs = skb_shinfo(skb)->gso_segs;
4074 first->bytecount += (first->gso_segs - 1) * *hdr_len;
4075
4076 /* MSS L4LEN IDX */
4077 mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4078 mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4079
4080 /* VLAN MACLEN IPLEN */
4081 vlan_macip_lens = skb_network_header_len(skb);
4082 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4083 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4084
4085 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4086
4087 return 1;
4088 }
4089
4090 static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4091 {
4092 struct sk_buff *skb = first->skb;
4093 u32 vlan_macip_lens = 0;
4094 u32 mss_l4len_idx = 0;
4095 u32 type_tucmd = 0;
4096
4097 if (skb->ip_summed != CHECKSUM_PARTIAL) {
4098 if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4099 return;
4100 } else {
4101 u8 l4_hdr = 0;
4102 switch (first->protocol) {
4103 case __constant_htons(ETH_P_IP):
4104 vlan_macip_lens |= skb_network_header_len(skb);
4105 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4106 l4_hdr = ip_hdr(skb)->protocol;
4107 break;
4108 case __constant_htons(ETH_P_IPV6):
4109 vlan_macip_lens |= skb_network_header_len(skb);
4110 l4_hdr = ipv6_hdr(skb)->nexthdr;
4111 break;
4112 default:
4113 if (unlikely(net_ratelimit())) {
4114 dev_warn(tx_ring->dev,
4115 "partial checksum but proto=%x!\n",
4116 first->protocol);
4117 }
4118 break;
4119 }
4120
4121 switch (l4_hdr) {
4122 case IPPROTO_TCP:
4123 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4124 mss_l4len_idx = tcp_hdrlen(skb) <<
4125 E1000_ADVTXD_L4LEN_SHIFT;
4126 break;
4127 case IPPROTO_SCTP:
4128 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4129 mss_l4len_idx = sizeof(struct sctphdr) <<
4130 E1000_ADVTXD_L4LEN_SHIFT;
4131 break;
4132 case IPPROTO_UDP:
4133 mss_l4len_idx = sizeof(struct udphdr) <<
4134 E1000_ADVTXD_L4LEN_SHIFT;
4135 break;
4136 default:
4137 if (unlikely(net_ratelimit())) {
4138 dev_warn(tx_ring->dev,
4139 "partial checksum but l4 proto=%x!\n",
4140 l4_hdr);
4141 }
4142 break;
4143 }
4144
4145 /* update TX checksum flag */
4146 first->tx_flags |= IGB_TX_FLAGS_CSUM;
4147 }
4148
4149 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4150 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4151
4152 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4153 }
4154
4155 static __le32 igb_tx_cmd_type(u32 tx_flags)
4156 {
4157 /* set type for advanced descriptor with frame checksum insertion */
4158 __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4159 E1000_ADVTXD_DCMD_IFCS |
4160 E1000_ADVTXD_DCMD_DEXT);
4161
4162 /* set HW vlan bit if vlan is present */
4163 if (tx_flags & IGB_TX_FLAGS_VLAN)
4164 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4165
4166 /* set timestamp bit if present */
4167 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4168 cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4169
4170 /* set segmentation bits for TSO */
4171 if (tx_flags & IGB_TX_FLAGS_TSO)
4172 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4173
4174 return cmd_type;
4175 }
4176
4177 static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4178 union e1000_adv_tx_desc *tx_desc,
4179 u32 tx_flags, unsigned int paylen)
4180 {
4181 u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4182
4183 /* 82575 requires a unique index per ring if any offload is enabled */
4184 if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4185 test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4186 olinfo_status |= tx_ring->reg_idx << 4;
4187
4188 /* insert L4 checksum */
4189 if (tx_flags & IGB_TX_FLAGS_CSUM) {
4190 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4191
4192 /* insert IPv4 checksum */
4193 if (tx_flags & IGB_TX_FLAGS_IPV4)
4194 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4195 }
4196
4197 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4198 }
4199
4200 /*
4201 * The largest size we can write to the descriptor is 65535. In order to
4202 * maintain a power of two alignment we have to limit ourselves to 32K.
4203 */
4204 #define IGB_MAX_TXD_PWR 15
4205 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
4206
4207 static void igb_tx_map(struct igb_ring *tx_ring,
4208 struct igb_tx_buffer *first,
4209 const u8 hdr_len)
4210 {
4211 struct sk_buff *skb = first->skb;
4212 struct igb_tx_buffer *tx_buffer_info;
4213 union e1000_adv_tx_desc *tx_desc;
4214 dma_addr_t dma;
4215 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4216 unsigned int data_len = skb->data_len;
4217 unsigned int size = skb_headlen(skb);
4218 unsigned int paylen = skb->len - hdr_len;
4219 __le32 cmd_type;
4220 u32 tx_flags = first->tx_flags;
4221 u16 i = tx_ring->next_to_use;
4222
4223 tx_desc = IGB_TX_DESC(tx_ring, i);
4224
4225 igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4226 cmd_type = igb_tx_cmd_type(tx_flags);
4227
4228 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4229 if (dma_mapping_error(tx_ring->dev, dma))
4230 goto dma_error;
4231
4232 /* record length, and DMA address */
4233 first->length = size;
4234 first->dma = dma;
4235 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4236
4237 for (;;) {
4238 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4239 tx_desc->read.cmd_type_len =
4240 cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4241
4242 i++;
4243 tx_desc++;
4244 if (i == tx_ring->count) {
4245 tx_desc = IGB_TX_DESC(tx_ring, 0);
4246 i = 0;
4247 }
4248
4249 dma += IGB_MAX_DATA_PER_TXD;
4250 size -= IGB_MAX_DATA_PER_TXD;
4251
4252 tx_desc->read.olinfo_status = 0;
4253 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4254 }
4255
4256 if (likely(!data_len))
4257 break;
4258
4259 tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4260
4261 i++;
4262 tx_desc++;
4263 if (i == tx_ring->count) {
4264 tx_desc = IGB_TX_DESC(tx_ring, 0);
4265 i = 0;
4266 }
4267
4268 size = skb_frag_size(frag);
4269 data_len -= size;
4270
4271 dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4272 size, DMA_TO_DEVICE);
4273 if (dma_mapping_error(tx_ring->dev, dma))
4274 goto dma_error;
4275
4276 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4277 tx_buffer_info->length = size;
4278 tx_buffer_info->dma = dma;
4279
4280 tx_desc->read.olinfo_status = 0;
4281 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4282
4283 frag++;
4284 }
4285
4286 netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
4287
4288 /* write last descriptor with RS and EOP bits */
4289 cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4290 if (unlikely(skb->no_fcs))
4291 cmd_type &= ~(cpu_to_le32(E1000_ADVTXD_DCMD_IFCS));
4292 tx_desc->read.cmd_type_len = cmd_type;
4293
4294 /* set the timestamp */
4295 first->time_stamp = jiffies;
4296
4297 /*
4298 * Force memory writes to complete before letting h/w know there
4299 * are new descriptors to fetch. (Only applicable for weak-ordered
4300 * memory model archs, such as IA-64).
4301 *
4302 * We also need this memory barrier to make certain all of the
4303 * status bits have been updated before next_to_watch is written.
4304 */
4305 wmb();
4306
4307 /* set next_to_watch value indicating a packet is present */
4308 first->next_to_watch = tx_desc;
4309
4310 i++;
4311 if (i == tx_ring->count)
4312 i = 0;
4313
4314 tx_ring->next_to_use = i;
4315
4316 writel(i, tx_ring->tail);
4317
4318 /* we need this if more than one processor can write to our tail
4319 * at a time, it syncronizes IO on IA64/Altix systems */
4320 mmiowb();
4321
4322 return;
4323
4324 dma_error:
4325 dev_err(tx_ring->dev, "TX DMA map failed\n");
4326
4327 /* clear dma mappings for failed tx_buffer_info map */
4328 for (;;) {
4329 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4330 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4331 if (tx_buffer_info == first)
4332 break;
4333 if (i == 0)
4334 i = tx_ring->count;
4335 i--;
4336 }
4337
4338 tx_ring->next_to_use = i;
4339 }
4340
4341 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4342 {
4343 struct net_device *netdev = tx_ring->netdev;
4344
4345 netif_stop_subqueue(netdev, tx_ring->queue_index);
4346
4347 /* Herbert's original patch had:
4348 * smp_mb__after_netif_stop_queue();
4349 * but since that doesn't exist yet, just open code it. */
4350 smp_mb();
4351
4352 /* We need to check again in a case another CPU has just
4353 * made room available. */
4354 if (igb_desc_unused(tx_ring) < size)
4355 return -EBUSY;
4356
4357 /* A reprieve! */
4358 netif_wake_subqueue(netdev, tx_ring->queue_index);
4359
4360 u64_stats_update_begin(&tx_ring->tx_syncp2);
4361 tx_ring->tx_stats.restart_queue2++;
4362 u64_stats_update_end(&tx_ring->tx_syncp2);
4363
4364 return 0;
4365 }
4366
4367 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4368 {
4369 if (igb_desc_unused(tx_ring) >= size)
4370 return 0;
4371 return __igb_maybe_stop_tx(tx_ring, size);
4372 }
4373
4374 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4375 struct igb_ring *tx_ring)
4376 {
4377 struct igb_tx_buffer *first;
4378 int tso;
4379 u32 tx_flags = 0;
4380 __be16 protocol = vlan_get_protocol(skb);
4381 u8 hdr_len = 0;
4382
4383 /* need: 1 descriptor per page,
4384 * + 2 desc gap to keep tail from touching head,
4385 * + 1 desc for skb->data,
4386 * + 1 desc for context descriptor,
4387 * otherwise try next time */
4388 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4389 /* this is a hard error */
4390 return NETDEV_TX_BUSY;
4391 }
4392
4393 /* record the location of the first descriptor for this packet */
4394 first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4395 first->skb = skb;
4396 first->bytecount = skb->len;
4397 first->gso_segs = 1;
4398
4399 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4400 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4401 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4402 }
4403
4404 if (vlan_tx_tag_present(skb)) {
4405 tx_flags |= IGB_TX_FLAGS_VLAN;
4406 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4407 }
4408
4409 /* record initial flags and protocol */
4410 first->tx_flags = tx_flags;
4411 first->protocol = protocol;
4412
4413 tso = igb_tso(tx_ring, first, &hdr_len);
4414 if (tso < 0)
4415 goto out_drop;
4416 else if (!tso)
4417 igb_tx_csum(tx_ring, first);
4418
4419 igb_tx_map(tx_ring, first, hdr_len);
4420
4421 /* Make sure there is space in the ring for the next send. */
4422 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4423
4424 return NETDEV_TX_OK;
4425
4426 out_drop:
4427 igb_unmap_and_free_tx_resource(tx_ring, first);
4428
4429 return NETDEV_TX_OK;
4430 }
4431
4432 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4433 struct sk_buff *skb)
4434 {
4435 unsigned int r_idx = skb->queue_mapping;
4436
4437 if (r_idx >= adapter->num_tx_queues)
4438 r_idx = r_idx % adapter->num_tx_queues;
4439
4440 return adapter->tx_ring[r_idx];
4441 }
4442
4443 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4444 struct net_device *netdev)
4445 {
4446 struct igb_adapter *adapter = netdev_priv(netdev);
4447
4448 if (test_bit(__IGB_DOWN, &adapter->state)) {
4449 dev_kfree_skb_any(skb);
4450 return NETDEV_TX_OK;
4451 }
4452
4453 if (skb->len <= 0) {
4454 dev_kfree_skb_any(skb);
4455 return NETDEV_TX_OK;
4456 }
4457
4458 /*
4459 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4460 * in order to meet this minimum size requirement.
4461 */
4462 if (skb->len < 17) {
4463 if (skb_padto(skb, 17))
4464 return NETDEV_TX_OK;
4465 skb->len = 17;
4466 }
4467
4468 return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4469 }
4470
4471 /**
4472 * igb_tx_timeout - Respond to a Tx Hang
4473 * @netdev: network interface device structure
4474 **/
4475 static void igb_tx_timeout(struct net_device *netdev)
4476 {
4477 struct igb_adapter *adapter = netdev_priv(netdev);
4478 struct e1000_hw *hw = &adapter->hw;
4479
4480 /* Do the reset outside of interrupt context */
4481 adapter->tx_timeout_count++;
4482
4483 if (hw->mac.type >= e1000_82580)
4484 hw->dev_spec._82575.global_device_reset = true;
4485
4486 schedule_work(&adapter->reset_task);
4487 wr32(E1000_EICS,
4488 (adapter->eims_enable_mask & ~adapter->eims_other));
4489 }
4490
4491 static void igb_reset_task(struct work_struct *work)
4492 {
4493 struct igb_adapter *adapter;
4494 adapter = container_of(work, struct igb_adapter, reset_task);
4495
4496 igb_dump(adapter);
4497 netdev_err(adapter->netdev, "Reset adapter\n");
4498 igb_reinit_locked(adapter);
4499 }
4500
4501 /**
4502 * igb_get_stats64 - Get System Network Statistics
4503 * @netdev: network interface device structure
4504 * @stats: rtnl_link_stats64 pointer
4505 *
4506 **/
4507 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4508 struct rtnl_link_stats64 *stats)
4509 {
4510 struct igb_adapter *adapter = netdev_priv(netdev);
4511
4512 spin_lock(&adapter->stats64_lock);
4513 igb_update_stats(adapter, &adapter->stats64);
4514 memcpy(stats, &adapter->stats64, sizeof(*stats));
4515 spin_unlock(&adapter->stats64_lock);
4516
4517 return stats;
4518 }
4519
4520 /**
4521 * igb_change_mtu - Change the Maximum Transfer Unit
4522 * @netdev: network interface device structure
4523 * @new_mtu: new value for maximum frame size
4524 *
4525 * Returns 0 on success, negative on failure
4526 **/
4527 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4528 {
4529 struct igb_adapter *adapter = netdev_priv(netdev);
4530 struct pci_dev *pdev = adapter->pdev;
4531 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4532
4533 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4534 dev_err(&pdev->dev, "Invalid MTU setting\n");
4535 return -EINVAL;
4536 }
4537
4538 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4539 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4540 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4541 return -EINVAL;
4542 }
4543
4544 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4545 msleep(1);
4546
4547 /* igb_down has a dependency on max_frame_size */
4548 adapter->max_frame_size = max_frame;
4549
4550 if (netif_running(netdev))
4551 igb_down(adapter);
4552
4553 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4554 netdev->mtu, new_mtu);
4555 netdev->mtu = new_mtu;
4556
4557 if (netif_running(netdev))
4558 igb_up(adapter);
4559 else
4560 igb_reset(adapter);
4561
4562 clear_bit(__IGB_RESETTING, &adapter->state);
4563
4564 return 0;
4565 }
4566
4567 /**
4568 * igb_update_stats - Update the board statistics counters
4569 * @adapter: board private structure
4570 **/
4571
4572 void igb_update_stats(struct igb_adapter *adapter,
4573 struct rtnl_link_stats64 *net_stats)
4574 {
4575 struct e1000_hw *hw = &adapter->hw;
4576 struct pci_dev *pdev = adapter->pdev;
4577 u32 reg, mpc;
4578 u16 phy_tmp;
4579 int i;
4580 u64 bytes, packets;
4581 unsigned int start;
4582 u64 _bytes, _packets;
4583
4584 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4585
4586 /*
4587 * Prevent stats update while adapter is being reset, or if the pci
4588 * connection is down.
4589 */
4590 if (adapter->link_speed == 0)
4591 return;
4592 if (pci_channel_offline(pdev))
4593 return;
4594
4595 bytes = 0;
4596 packets = 0;
4597 for (i = 0; i < adapter->num_rx_queues; i++) {
4598 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4599 struct igb_ring *ring = adapter->rx_ring[i];
4600
4601 ring->rx_stats.drops += rqdpc_tmp;
4602 net_stats->rx_fifo_errors += rqdpc_tmp;
4603
4604 do {
4605 start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4606 _bytes = ring->rx_stats.bytes;
4607 _packets = ring->rx_stats.packets;
4608 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4609 bytes += _bytes;
4610 packets += _packets;
4611 }
4612
4613 net_stats->rx_bytes = bytes;
4614 net_stats->rx_packets = packets;
4615
4616 bytes = 0;
4617 packets = 0;
4618 for (i = 0; i < adapter->num_tx_queues; i++) {
4619 struct igb_ring *ring = adapter->tx_ring[i];
4620 do {
4621 start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4622 _bytes = ring->tx_stats.bytes;
4623 _packets = ring->tx_stats.packets;
4624 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4625 bytes += _bytes;
4626 packets += _packets;
4627 }
4628 net_stats->tx_bytes = bytes;
4629 net_stats->tx_packets = packets;
4630
4631 /* read stats registers */
4632 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4633 adapter->stats.gprc += rd32(E1000_GPRC);
4634 adapter->stats.gorc += rd32(E1000_GORCL);
4635 rd32(E1000_GORCH); /* clear GORCL */
4636 adapter->stats.bprc += rd32(E1000_BPRC);
4637 adapter->stats.mprc += rd32(E1000_MPRC);
4638 adapter->stats.roc += rd32(E1000_ROC);
4639
4640 adapter->stats.prc64 += rd32(E1000_PRC64);
4641 adapter->stats.prc127 += rd32(E1000_PRC127);
4642 adapter->stats.prc255 += rd32(E1000_PRC255);
4643 adapter->stats.prc511 += rd32(E1000_PRC511);
4644 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4645 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4646 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4647 adapter->stats.sec += rd32(E1000_SEC);
4648
4649 mpc = rd32(E1000_MPC);
4650 adapter->stats.mpc += mpc;
4651 net_stats->rx_fifo_errors += mpc;
4652 adapter->stats.scc += rd32(E1000_SCC);
4653 adapter->stats.ecol += rd32(E1000_ECOL);
4654 adapter->stats.mcc += rd32(E1000_MCC);
4655 adapter->stats.latecol += rd32(E1000_LATECOL);
4656 adapter->stats.dc += rd32(E1000_DC);
4657 adapter->stats.rlec += rd32(E1000_RLEC);
4658 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4659 adapter->stats.xontxc += rd32(E1000_XONTXC);
4660 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4661 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4662 adapter->stats.fcruc += rd32(E1000_FCRUC);
4663 adapter->stats.gptc += rd32(E1000_GPTC);
4664 adapter->stats.gotc += rd32(E1000_GOTCL);
4665 rd32(E1000_GOTCH); /* clear GOTCL */
4666 adapter->stats.rnbc += rd32(E1000_RNBC);
4667 adapter->stats.ruc += rd32(E1000_RUC);
4668 adapter->stats.rfc += rd32(E1000_RFC);
4669 adapter->stats.rjc += rd32(E1000_RJC);
4670 adapter->stats.tor += rd32(E1000_TORH);
4671 adapter->stats.tot += rd32(E1000_TOTH);
4672 adapter->stats.tpr += rd32(E1000_TPR);
4673
4674 adapter->stats.ptc64 += rd32(E1000_PTC64);
4675 adapter->stats.ptc127 += rd32(E1000_PTC127);
4676 adapter->stats.ptc255 += rd32(E1000_PTC255);
4677 adapter->stats.ptc511 += rd32(E1000_PTC511);
4678 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4679 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4680
4681 adapter->stats.mptc += rd32(E1000_MPTC);
4682 adapter->stats.bptc += rd32(E1000_BPTC);
4683
4684 adapter->stats.tpt += rd32(E1000_TPT);
4685 adapter->stats.colc += rd32(E1000_COLC);
4686
4687 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4688 /* read internal phy specific stats */
4689 reg = rd32(E1000_CTRL_EXT);
4690 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4691 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4692 adapter->stats.tncrs += rd32(E1000_TNCRS);
4693 }
4694
4695 adapter->stats.tsctc += rd32(E1000_TSCTC);
4696 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4697
4698 adapter->stats.iac += rd32(E1000_IAC);
4699 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4700 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4701 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4702 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4703 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4704 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4705 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4706 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4707
4708 /* Fill out the OS statistics structure */
4709 net_stats->multicast = adapter->stats.mprc;
4710 net_stats->collisions = adapter->stats.colc;
4711
4712 /* Rx Errors */
4713
4714 /* RLEC on some newer hardware can be incorrect so build
4715 * our own version based on RUC and ROC */
4716 net_stats->rx_errors = adapter->stats.rxerrc +
4717 adapter->stats.crcerrs + adapter->stats.algnerrc +
4718 adapter->stats.ruc + adapter->stats.roc +
4719 adapter->stats.cexterr;
4720 net_stats->rx_length_errors = adapter->stats.ruc +
4721 adapter->stats.roc;
4722 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4723 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4724 net_stats->rx_missed_errors = adapter->stats.mpc;
4725
4726 /* Tx Errors */
4727 net_stats->tx_errors = adapter->stats.ecol +
4728 adapter->stats.latecol;
4729 net_stats->tx_aborted_errors = adapter->stats.ecol;
4730 net_stats->tx_window_errors = adapter->stats.latecol;
4731 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4732
4733 /* Tx Dropped needs to be maintained elsewhere */
4734
4735 /* Phy Stats */
4736 if (hw->phy.media_type == e1000_media_type_copper) {
4737 if ((adapter->link_speed == SPEED_1000) &&
4738 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4739 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4740 adapter->phy_stats.idle_errors += phy_tmp;
4741 }
4742 }
4743
4744 /* Management Stats */
4745 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4746 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4747 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4748
4749 /* OS2BMC Stats */
4750 reg = rd32(E1000_MANC);
4751 if (reg & E1000_MANC_EN_BMC2OS) {
4752 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4753 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4754 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4755 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4756 }
4757 }
4758
4759 static irqreturn_t igb_msix_other(int irq, void *data)
4760 {
4761 struct igb_adapter *adapter = data;
4762 struct e1000_hw *hw = &adapter->hw;
4763 u32 icr = rd32(E1000_ICR);
4764 /* reading ICR causes bit 31 of EICR to be cleared */
4765
4766 if (icr & E1000_ICR_DRSTA)
4767 schedule_work(&adapter->reset_task);
4768
4769 if (icr & E1000_ICR_DOUTSYNC) {
4770 /* HW is reporting DMA is out of sync */
4771 adapter->stats.doosync++;
4772 /* The DMA Out of Sync is also indication of a spoof event
4773 * in IOV mode. Check the Wrong VM Behavior register to
4774 * see if it is really a spoof event. */
4775 igb_check_wvbr(adapter);
4776 }
4777
4778 /* Check for a mailbox event */
4779 if (icr & E1000_ICR_VMMB)
4780 igb_msg_task(adapter);
4781
4782 if (icr & E1000_ICR_LSC) {
4783 hw->mac.get_link_status = 1;
4784 /* guard against interrupt when we're going down */
4785 if (!test_bit(__IGB_DOWN, &adapter->state))
4786 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4787 }
4788
4789 wr32(E1000_EIMS, adapter->eims_other);
4790
4791 return IRQ_HANDLED;
4792 }
4793
4794 static void igb_write_itr(struct igb_q_vector *q_vector)
4795 {
4796 struct igb_adapter *adapter = q_vector->adapter;
4797 u32 itr_val = q_vector->itr_val & 0x7FFC;
4798
4799 if (!q_vector->set_itr)
4800 return;
4801
4802 if (!itr_val)
4803 itr_val = 0x4;
4804
4805 if (adapter->hw.mac.type == e1000_82575)
4806 itr_val |= itr_val << 16;
4807 else
4808 itr_val |= E1000_EITR_CNT_IGNR;
4809
4810 writel(itr_val, q_vector->itr_register);
4811 q_vector->set_itr = 0;
4812 }
4813
4814 static irqreturn_t igb_msix_ring(int irq, void *data)
4815 {
4816 struct igb_q_vector *q_vector = data;
4817
4818 /* Write the ITR value calculated from the previous interrupt. */
4819 igb_write_itr(q_vector);
4820
4821 napi_schedule(&q_vector->napi);
4822
4823 return IRQ_HANDLED;
4824 }
4825
4826 #ifdef CONFIG_IGB_DCA
4827 static void igb_update_dca(struct igb_q_vector *q_vector)
4828 {
4829 struct igb_adapter *adapter = q_vector->adapter;
4830 struct e1000_hw *hw = &adapter->hw;
4831 int cpu = get_cpu();
4832
4833 if (q_vector->cpu == cpu)
4834 goto out_no_update;
4835
4836 if (q_vector->tx.ring) {
4837 int q = q_vector->tx.ring->reg_idx;
4838 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4839 if (hw->mac.type == e1000_82575) {
4840 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4841 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4842 } else {
4843 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4844 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4845 E1000_DCA_TXCTRL_CPUID_SHIFT;
4846 }
4847 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4848 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4849 }
4850 if (q_vector->rx.ring) {
4851 int q = q_vector->rx.ring->reg_idx;
4852 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4853 if (hw->mac.type == e1000_82575) {
4854 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4855 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4856 } else {
4857 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4858 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4859 E1000_DCA_RXCTRL_CPUID_SHIFT;
4860 }
4861 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4862 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4863 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4864 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4865 }
4866 q_vector->cpu = cpu;
4867 out_no_update:
4868 put_cpu();
4869 }
4870
4871 static void igb_setup_dca(struct igb_adapter *adapter)
4872 {
4873 struct e1000_hw *hw = &adapter->hw;
4874 int i;
4875
4876 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4877 return;
4878
4879 /* Always use CB2 mode, difference is masked in the CB driver. */
4880 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4881
4882 for (i = 0; i < adapter->num_q_vectors; i++) {
4883 adapter->q_vector[i]->cpu = -1;
4884 igb_update_dca(adapter->q_vector[i]);
4885 }
4886 }
4887
4888 static int __igb_notify_dca(struct device *dev, void *data)
4889 {
4890 struct net_device *netdev = dev_get_drvdata(dev);
4891 struct igb_adapter *adapter = netdev_priv(netdev);
4892 struct pci_dev *pdev = adapter->pdev;
4893 struct e1000_hw *hw = &adapter->hw;
4894 unsigned long event = *(unsigned long *)data;
4895
4896 switch (event) {
4897 case DCA_PROVIDER_ADD:
4898 /* if already enabled, don't do it again */
4899 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4900 break;
4901 if (dca_add_requester(dev) == 0) {
4902 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4903 dev_info(&pdev->dev, "DCA enabled\n");
4904 igb_setup_dca(adapter);
4905 break;
4906 }
4907 /* Fall Through since DCA is disabled. */
4908 case DCA_PROVIDER_REMOVE:
4909 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4910 /* without this a class_device is left
4911 * hanging around in the sysfs model */
4912 dca_remove_requester(dev);
4913 dev_info(&pdev->dev, "DCA disabled\n");
4914 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4915 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4916 }
4917 break;
4918 }
4919
4920 return 0;
4921 }
4922
4923 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4924 void *p)
4925 {
4926 int ret_val;
4927
4928 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4929 __igb_notify_dca);
4930
4931 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4932 }
4933 #endif /* CONFIG_IGB_DCA */
4934
4935 #ifdef CONFIG_PCI_IOV
4936 static int igb_vf_configure(struct igb_adapter *adapter, int vf)
4937 {
4938 unsigned char mac_addr[ETH_ALEN];
4939 struct pci_dev *pdev = adapter->pdev;
4940 struct e1000_hw *hw = &adapter->hw;
4941 struct pci_dev *pvfdev;
4942 unsigned int device_id;
4943 u16 thisvf_devfn;
4944
4945 random_ether_addr(mac_addr);
4946 igb_set_vf_mac(adapter, vf, mac_addr);
4947
4948 switch (adapter->hw.mac.type) {
4949 case e1000_82576:
4950 device_id = IGB_82576_VF_DEV_ID;
4951 /* VF Stride for 82576 is 2 */
4952 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
4953 (pdev->devfn & 1);
4954 break;
4955 case e1000_i350:
4956 device_id = IGB_I350_VF_DEV_ID;
4957 /* VF Stride for I350 is 4 */
4958 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
4959 (pdev->devfn & 3);
4960 break;
4961 default:
4962 device_id = 0;
4963 thisvf_devfn = 0;
4964 break;
4965 }
4966
4967 pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
4968 while (pvfdev) {
4969 if (pvfdev->devfn == thisvf_devfn)
4970 break;
4971 pvfdev = pci_get_device(hw->vendor_id,
4972 device_id, pvfdev);
4973 }
4974
4975 if (pvfdev)
4976 adapter->vf_data[vf].vfdev = pvfdev;
4977 else
4978 dev_err(&pdev->dev,
4979 "Couldn't find pci dev ptr for VF %4.4x\n",
4980 thisvf_devfn);
4981 return pvfdev != NULL;
4982 }
4983
4984 static int igb_find_enabled_vfs(struct igb_adapter *adapter)
4985 {
4986 struct e1000_hw *hw = &adapter->hw;
4987 struct pci_dev *pdev = adapter->pdev;
4988 struct pci_dev *pvfdev;
4989 u16 vf_devfn = 0;
4990 u16 vf_stride;
4991 unsigned int device_id;
4992 int vfs_found = 0;
4993
4994 switch (adapter->hw.mac.type) {
4995 case e1000_82576:
4996 device_id = IGB_82576_VF_DEV_ID;
4997 /* VF Stride for 82576 is 2 */
4998 vf_stride = 2;
4999 break;
5000 case e1000_i350:
5001 device_id = IGB_I350_VF_DEV_ID;
5002 /* VF Stride for I350 is 4 */
5003 vf_stride = 4;
5004 break;
5005 default:
5006 device_id = 0;
5007 vf_stride = 0;
5008 break;
5009 }
5010
5011 vf_devfn = pdev->devfn + 0x80;
5012 pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5013 while (pvfdev) {
5014 if (pvfdev->devfn == vf_devfn &&
5015 (pvfdev->bus->number >= pdev->bus->number))
5016 vfs_found++;
5017 vf_devfn += vf_stride;
5018 pvfdev = pci_get_device(hw->vendor_id,
5019 device_id, pvfdev);
5020 }
5021
5022 return vfs_found;
5023 }
5024
5025 static int igb_check_vf_assignment(struct igb_adapter *adapter)
5026 {
5027 int i;
5028 for (i = 0; i < adapter->vfs_allocated_count; i++) {
5029 if (adapter->vf_data[i].vfdev) {
5030 if (adapter->vf_data[i].vfdev->dev_flags &
5031 PCI_DEV_FLAGS_ASSIGNED)
5032 return true;
5033 }
5034 }
5035 return false;
5036 }
5037
5038 #endif
5039 static void igb_ping_all_vfs(struct igb_adapter *adapter)
5040 {
5041 struct e1000_hw *hw = &adapter->hw;
5042 u32 ping;
5043 int i;
5044
5045 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5046 ping = E1000_PF_CONTROL_MSG;
5047 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5048 ping |= E1000_VT_MSGTYPE_CTS;
5049 igb_write_mbx(hw, &ping, 1, i);
5050 }
5051 }
5052
5053 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5054 {
5055 struct e1000_hw *hw = &adapter->hw;
5056 u32 vmolr = rd32(E1000_VMOLR(vf));
5057 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5058
5059 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5060 IGB_VF_FLAG_MULTI_PROMISC);
5061 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5062
5063 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5064 vmolr |= E1000_VMOLR_MPME;
5065 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5066 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5067 } else {
5068 /*
5069 * if we have hashes and we are clearing a multicast promisc
5070 * flag we need to write the hashes to the MTA as this step
5071 * was previously skipped
5072 */
5073 if (vf_data->num_vf_mc_hashes > 30) {
5074 vmolr |= E1000_VMOLR_MPME;
5075 } else if (vf_data->num_vf_mc_hashes) {
5076 int j;
5077 vmolr |= E1000_VMOLR_ROMPE;
5078 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5079 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5080 }
5081 }
5082
5083 wr32(E1000_VMOLR(vf), vmolr);
5084
5085 /* there are flags left unprocessed, likely not supported */
5086 if (*msgbuf & E1000_VT_MSGINFO_MASK)
5087 return -EINVAL;
5088
5089 return 0;
5090
5091 }
5092
5093 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5094 u32 *msgbuf, u32 vf)
5095 {
5096 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5097 u16 *hash_list = (u16 *)&msgbuf[1];
5098 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5099 int i;
5100
5101 /* salt away the number of multicast addresses assigned
5102 * to this VF for later use to restore when the PF multi cast
5103 * list changes
5104 */
5105 vf_data->num_vf_mc_hashes = n;
5106
5107 /* only up to 30 hash values supported */
5108 if (n > 30)
5109 n = 30;
5110
5111 /* store the hashes for later use */
5112 for (i = 0; i < n; i++)
5113 vf_data->vf_mc_hashes[i] = hash_list[i];
5114
5115 /* Flush and reset the mta with the new values */
5116 igb_set_rx_mode(adapter->netdev);
5117
5118 return 0;
5119 }
5120
5121 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5122 {
5123 struct e1000_hw *hw = &adapter->hw;
5124 struct vf_data_storage *vf_data;
5125 int i, j;
5126
5127 for (i = 0; i < adapter->vfs_allocated_count; i++) {
5128 u32 vmolr = rd32(E1000_VMOLR(i));
5129 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5130
5131 vf_data = &adapter->vf_data[i];
5132
5133 if ((vf_data->num_vf_mc_hashes > 30) ||
5134 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5135 vmolr |= E1000_VMOLR_MPME;
5136 } else if (vf_data->num_vf_mc_hashes) {
5137 vmolr |= E1000_VMOLR_ROMPE;
5138 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5139 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5140 }
5141 wr32(E1000_VMOLR(i), vmolr);
5142 }
5143 }
5144
5145 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5146 {
5147 struct e1000_hw *hw = &adapter->hw;
5148 u32 pool_mask, reg, vid;
5149 int i;
5150
5151 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5152
5153 /* Find the vlan filter for this id */
5154 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5155 reg = rd32(E1000_VLVF(i));
5156
5157 /* remove the vf from the pool */
5158 reg &= ~pool_mask;
5159
5160 /* if pool is empty then remove entry from vfta */
5161 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5162 (reg & E1000_VLVF_VLANID_ENABLE)) {
5163 reg = 0;
5164 vid = reg & E1000_VLVF_VLANID_MASK;
5165 igb_vfta_set(hw, vid, false);
5166 }
5167
5168 wr32(E1000_VLVF(i), reg);
5169 }
5170
5171 adapter->vf_data[vf].vlans_enabled = 0;
5172 }
5173
5174 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5175 {
5176 struct e1000_hw *hw = &adapter->hw;
5177 u32 reg, i;
5178
5179 /* The vlvf table only exists on 82576 hardware and newer */
5180 if (hw->mac.type < e1000_82576)
5181 return -1;
5182
5183 /* we only need to do this if VMDq is enabled */
5184 if (!adapter->vfs_allocated_count)
5185 return -1;
5186
5187 /* Find the vlan filter for this id */
5188 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5189 reg = rd32(E1000_VLVF(i));
5190 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5191 vid == (reg & E1000_VLVF_VLANID_MASK))
5192 break;
5193 }
5194
5195 if (add) {
5196 if (i == E1000_VLVF_ARRAY_SIZE) {
5197 /* Did not find a matching VLAN ID entry that was
5198 * enabled. Search for a free filter entry, i.e.
5199 * one without the enable bit set
5200 */
5201 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5202 reg = rd32(E1000_VLVF(i));
5203 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5204 break;
5205 }
5206 }
5207 if (i < E1000_VLVF_ARRAY_SIZE) {
5208 /* Found an enabled/available entry */
5209 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5210
5211 /* if !enabled we need to set this up in vfta */
5212 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5213 /* add VID to filter table */
5214 igb_vfta_set(hw, vid, true);
5215 reg |= E1000_VLVF_VLANID_ENABLE;
5216 }
5217 reg &= ~E1000_VLVF_VLANID_MASK;
5218 reg |= vid;
5219 wr32(E1000_VLVF(i), reg);
5220
5221 /* do not modify RLPML for PF devices */
5222 if (vf >= adapter->vfs_allocated_count)
5223 return 0;
5224
5225 if (!adapter->vf_data[vf].vlans_enabled) {
5226 u32 size;
5227 reg = rd32(E1000_VMOLR(vf));
5228 size = reg & E1000_VMOLR_RLPML_MASK;
5229 size += 4;
5230 reg &= ~E1000_VMOLR_RLPML_MASK;
5231 reg |= size;
5232 wr32(E1000_VMOLR(vf), reg);
5233 }
5234
5235 adapter->vf_data[vf].vlans_enabled++;
5236 }
5237 } else {
5238 if (i < E1000_VLVF_ARRAY_SIZE) {
5239 /* remove vf from the pool */
5240 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5241 /* if pool is empty then remove entry from vfta */
5242 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5243 reg = 0;
5244 igb_vfta_set(hw, vid, false);
5245 }
5246 wr32(E1000_VLVF(i), reg);
5247
5248 /* do not modify RLPML for PF devices */
5249 if (vf >= adapter->vfs_allocated_count)
5250 return 0;
5251
5252 adapter->vf_data[vf].vlans_enabled--;
5253 if (!adapter->vf_data[vf].vlans_enabled) {
5254 u32 size;
5255 reg = rd32(E1000_VMOLR(vf));
5256 size = reg & E1000_VMOLR_RLPML_MASK;
5257 size -= 4;
5258 reg &= ~E1000_VMOLR_RLPML_MASK;
5259 reg |= size;
5260 wr32(E1000_VMOLR(vf), reg);
5261 }
5262 }
5263 }
5264 return 0;
5265 }
5266
5267 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5268 {
5269 struct e1000_hw *hw = &adapter->hw;
5270
5271 if (vid)
5272 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5273 else
5274 wr32(E1000_VMVIR(vf), 0);
5275 }
5276
5277 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5278 int vf, u16 vlan, u8 qos)
5279 {
5280 int err = 0;
5281 struct igb_adapter *adapter = netdev_priv(netdev);
5282
5283 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5284 return -EINVAL;
5285 if (vlan || qos) {
5286 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5287 if (err)
5288 goto out;
5289 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5290 igb_set_vmolr(adapter, vf, !vlan);
5291 adapter->vf_data[vf].pf_vlan = vlan;
5292 adapter->vf_data[vf].pf_qos = qos;
5293 dev_info(&adapter->pdev->dev,
5294 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5295 if (test_bit(__IGB_DOWN, &adapter->state)) {
5296 dev_warn(&adapter->pdev->dev,
5297 "The VF VLAN has been set,"
5298 " but the PF device is not up.\n");
5299 dev_warn(&adapter->pdev->dev,
5300 "Bring the PF device up before"
5301 " attempting to use the VF device.\n");
5302 }
5303 } else {
5304 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5305 false, vf);
5306 igb_set_vmvir(adapter, vlan, vf);
5307 igb_set_vmolr(adapter, vf, true);
5308 adapter->vf_data[vf].pf_vlan = 0;
5309 adapter->vf_data[vf].pf_qos = 0;
5310 }
5311 out:
5312 return err;
5313 }
5314
5315 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5316 {
5317 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5318 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5319
5320 return igb_vlvf_set(adapter, vid, add, vf);
5321 }
5322
5323 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5324 {
5325 /* clear flags - except flag that indicates PF has set the MAC */
5326 adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5327 adapter->vf_data[vf].last_nack = jiffies;
5328
5329 /* reset offloads to defaults */
5330 igb_set_vmolr(adapter, vf, true);
5331
5332 /* reset vlans for device */
5333 igb_clear_vf_vfta(adapter, vf);
5334 if (adapter->vf_data[vf].pf_vlan)
5335 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5336 adapter->vf_data[vf].pf_vlan,
5337 adapter->vf_data[vf].pf_qos);
5338 else
5339 igb_clear_vf_vfta(adapter, vf);
5340
5341 /* reset multicast table array for vf */
5342 adapter->vf_data[vf].num_vf_mc_hashes = 0;
5343
5344 /* Flush and reset the mta with the new values */
5345 igb_set_rx_mode(adapter->netdev);
5346 }
5347
5348 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5349 {
5350 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5351
5352 /* generate a new mac address as we were hotplug removed/added */
5353 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5354 random_ether_addr(vf_mac);
5355
5356 /* process remaining reset events */
5357 igb_vf_reset(adapter, vf);
5358 }
5359
5360 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5361 {
5362 struct e1000_hw *hw = &adapter->hw;
5363 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5364 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5365 u32 reg, msgbuf[3];
5366 u8 *addr = (u8 *)(&msgbuf[1]);
5367
5368 /* process all the same items cleared in a function level reset */
5369 igb_vf_reset(adapter, vf);
5370
5371 /* set vf mac address */
5372 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5373
5374 /* enable transmit and receive for vf */
5375 reg = rd32(E1000_VFTE);
5376 wr32(E1000_VFTE, reg | (1 << vf));
5377 reg = rd32(E1000_VFRE);
5378 wr32(E1000_VFRE, reg | (1 << vf));
5379
5380 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5381
5382 /* reply to reset with ack and vf mac address */
5383 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5384 memcpy(addr, vf_mac, 6);
5385 igb_write_mbx(hw, msgbuf, 3, vf);
5386 }
5387
5388 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5389 {
5390 /*
5391 * The VF MAC Address is stored in a packed array of bytes
5392 * starting at the second 32 bit word of the msg array
5393 */
5394 unsigned char *addr = (char *)&msg[1];
5395 int err = -1;
5396
5397 if (is_valid_ether_addr(addr))
5398 err = igb_set_vf_mac(adapter, vf, addr);
5399
5400 return err;
5401 }
5402
5403 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5404 {
5405 struct e1000_hw *hw = &adapter->hw;
5406 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5407 u32 msg = E1000_VT_MSGTYPE_NACK;
5408
5409 /* if device isn't clear to send it shouldn't be reading either */
5410 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5411 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5412 igb_write_mbx(hw, &msg, 1, vf);
5413 vf_data->last_nack = jiffies;
5414 }
5415 }
5416
5417 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5418 {
5419 struct pci_dev *pdev = adapter->pdev;
5420 u32 msgbuf[E1000_VFMAILBOX_SIZE];
5421 struct e1000_hw *hw = &adapter->hw;
5422 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5423 s32 retval;
5424
5425 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5426
5427 if (retval) {
5428 /* if receive failed revoke VF CTS stats and restart init */
5429 dev_err(&pdev->dev, "Error receiving message from VF\n");
5430 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5431 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5432 return;
5433 goto out;
5434 }
5435
5436 /* this is a message we already processed, do nothing */
5437 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5438 return;
5439
5440 /*
5441 * until the vf completes a reset it should not be
5442 * allowed to start any configuration.
5443 */
5444
5445 if (msgbuf[0] == E1000_VF_RESET) {
5446 igb_vf_reset_msg(adapter, vf);
5447 return;
5448 }
5449
5450 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5451 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5452 return;
5453 retval = -1;
5454 goto out;
5455 }
5456
5457 switch ((msgbuf[0] & 0xFFFF)) {
5458 case E1000_VF_SET_MAC_ADDR:
5459 retval = -EINVAL;
5460 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5461 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5462 else
5463 dev_warn(&pdev->dev,
5464 "VF %d attempted to override administratively "
5465 "set MAC address\nReload the VF driver to "
5466 "resume operations\n", vf);
5467 break;
5468 case E1000_VF_SET_PROMISC:
5469 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5470 break;
5471 case E1000_VF_SET_MULTICAST:
5472 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5473 break;
5474 case E1000_VF_SET_LPE:
5475 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5476 break;
5477 case E1000_VF_SET_VLAN:
5478 retval = -1;
5479 if (vf_data->pf_vlan)
5480 dev_warn(&pdev->dev,
5481 "VF %d attempted to override administratively "
5482 "set VLAN tag\nReload the VF driver to "
5483 "resume operations\n", vf);
5484 else
5485 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5486 break;
5487 default:
5488 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5489 retval = -1;
5490 break;
5491 }
5492
5493 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5494 out:
5495 /* notify the VF of the results of what it sent us */
5496 if (retval)
5497 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5498 else
5499 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5500
5501 igb_write_mbx(hw, msgbuf, 1, vf);
5502 }
5503
5504 static void igb_msg_task(struct igb_adapter *adapter)
5505 {
5506 struct e1000_hw *hw = &adapter->hw;
5507 u32 vf;
5508
5509 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5510 /* process any reset requests */
5511 if (!igb_check_for_rst(hw, vf))
5512 igb_vf_reset_event(adapter, vf);
5513
5514 /* process any messages pending */
5515 if (!igb_check_for_msg(hw, vf))
5516 igb_rcv_msg_from_vf(adapter, vf);
5517
5518 /* process any acks */
5519 if (!igb_check_for_ack(hw, vf))
5520 igb_rcv_ack_from_vf(adapter, vf);
5521 }
5522 }
5523
5524 /**
5525 * igb_set_uta - Set unicast filter table address
5526 * @adapter: board private structure
5527 *
5528 * The unicast table address is a register array of 32-bit registers.
5529 * The table is meant to be used in a way similar to how the MTA is used
5530 * however due to certain limitations in the hardware it is necessary to
5531 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5532 * enable bit to allow vlan tag stripping when promiscuous mode is enabled
5533 **/
5534 static void igb_set_uta(struct igb_adapter *adapter)
5535 {
5536 struct e1000_hw *hw = &adapter->hw;
5537 int i;
5538
5539 /* The UTA table only exists on 82576 hardware and newer */
5540 if (hw->mac.type < e1000_82576)
5541 return;
5542
5543 /* we only need to do this if VMDq is enabled */
5544 if (!adapter->vfs_allocated_count)
5545 return;
5546
5547 for (i = 0; i < hw->mac.uta_reg_count; i++)
5548 array_wr32(E1000_UTA, i, ~0);
5549 }
5550
5551 /**
5552 * igb_intr_msi - Interrupt Handler
5553 * @irq: interrupt number
5554 * @data: pointer to a network interface device structure
5555 **/
5556 static irqreturn_t igb_intr_msi(int irq, void *data)
5557 {
5558 struct igb_adapter *adapter = data;
5559 struct igb_q_vector *q_vector = adapter->q_vector[0];
5560 struct e1000_hw *hw = &adapter->hw;
5561 /* read ICR disables interrupts using IAM */
5562 u32 icr = rd32(E1000_ICR);
5563
5564 igb_write_itr(q_vector);
5565
5566 if (icr & E1000_ICR_DRSTA)
5567 schedule_work(&adapter->reset_task);
5568
5569 if (icr & E1000_ICR_DOUTSYNC) {
5570 /* HW is reporting DMA is out of sync */
5571 adapter->stats.doosync++;
5572 }
5573
5574 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5575 hw->mac.get_link_status = 1;
5576 if (!test_bit(__IGB_DOWN, &adapter->state))
5577 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5578 }
5579
5580 napi_schedule(&q_vector->napi);
5581
5582 return IRQ_HANDLED;
5583 }
5584
5585 /**
5586 * igb_intr - Legacy Interrupt Handler
5587 * @irq: interrupt number
5588 * @data: pointer to a network interface device structure
5589 **/
5590 static irqreturn_t igb_intr(int irq, void *data)
5591 {
5592 struct igb_adapter *adapter = data;
5593 struct igb_q_vector *q_vector = adapter->q_vector[0];
5594 struct e1000_hw *hw = &adapter->hw;
5595 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5596 * need for the IMC write */
5597 u32 icr = rd32(E1000_ICR);
5598
5599 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5600 * not set, then the adapter didn't send an interrupt */
5601 if (!(icr & E1000_ICR_INT_ASSERTED))
5602 return IRQ_NONE;
5603
5604 igb_write_itr(q_vector);
5605
5606 if (icr & E1000_ICR_DRSTA)
5607 schedule_work(&adapter->reset_task);
5608
5609 if (icr & E1000_ICR_DOUTSYNC) {
5610 /* HW is reporting DMA is out of sync */
5611 adapter->stats.doosync++;
5612 }
5613
5614 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5615 hw->mac.get_link_status = 1;
5616 /* guard against interrupt when we're going down */
5617 if (!test_bit(__IGB_DOWN, &adapter->state))
5618 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5619 }
5620
5621 napi_schedule(&q_vector->napi);
5622
5623 return IRQ_HANDLED;
5624 }
5625
5626 static void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5627 {
5628 struct igb_adapter *adapter = q_vector->adapter;
5629 struct e1000_hw *hw = &adapter->hw;
5630
5631 if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5632 (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5633 if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5634 igb_set_itr(q_vector);
5635 else
5636 igb_update_ring_itr(q_vector);
5637 }
5638
5639 if (!test_bit(__IGB_DOWN, &adapter->state)) {
5640 if (adapter->msix_entries)
5641 wr32(E1000_EIMS, q_vector->eims_value);
5642 else
5643 igb_irq_enable(adapter);
5644 }
5645 }
5646
5647 /**
5648 * igb_poll - NAPI Rx polling callback
5649 * @napi: napi polling structure
5650 * @budget: count of how many packets we should handle
5651 **/
5652 static int igb_poll(struct napi_struct *napi, int budget)
5653 {
5654 struct igb_q_vector *q_vector = container_of(napi,
5655 struct igb_q_vector,
5656 napi);
5657 bool clean_complete = true;
5658
5659 #ifdef CONFIG_IGB_DCA
5660 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5661 igb_update_dca(q_vector);
5662 #endif
5663 if (q_vector->tx.ring)
5664 clean_complete = igb_clean_tx_irq(q_vector);
5665
5666 if (q_vector->rx.ring)
5667 clean_complete &= igb_clean_rx_irq(q_vector, budget);
5668
5669 /* If all work not completed, return budget and keep polling */
5670 if (!clean_complete)
5671 return budget;
5672
5673 /* If not enough Rx work done, exit the polling mode */
5674 napi_complete(napi);
5675 igb_ring_irq_enable(q_vector);
5676
5677 return 0;
5678 }
5679
5680 #ifdef CONFIG_IGB_PTP
5681 /**
5682 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5683 * @q_vector: pointer to q_vector containing needed info
5684 * @buffer: pointer to igb_tx_buffer structure
5685 *
5686 * If we were asked to do hardware stamping and such a time stamp is
5687 * available, then it must have been for this skb here because we only
5688 * allow only one such packet into the queue.
5689 */
5690 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5691 struct igb_tx_buffer *buffer_info)
5692 {
5693 struct igb_adapter *adapter = q_vector->adapter;
5694 struct e1000_hw *hw = &adapter->hw;
5695 struct skb_shared_hwtstamps shhwtstamps;
5696 u64 regval;
5697
5698 /* if skb does not support hw timestamp or TX stamp not valid exit */
5699 if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5700 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5701 return;
5702
5703 regval = rd32(E1000_TXSTMPL);
5704 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5705
5706 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5707 skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5708 }
5709
5710 #endif
5711 /**
5712 * igb_clean_tx_irq - Reclaim resources after transmit completes
5713 * @q_vector: pointer to q_vector containing needed info
5714 * returns true if ring is completely cleaned
5715 **/
5716 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5717 {
5718 struct igb_adapter *adapter = q_vector->adapter;
5719 struct igb_ring *tx_ring = q_vector->tx.ring;
5720 struct igb_tx_buffer *tx_buffer;
5721 union e1000_adv_tx_desc *tx_desc, *eop_desc;
5722 unsigned int total_bytes = 0, total_packets = 0;
5723 unsigned int budget = q_vector->tx.work_limit;
5724 unsigned int i = tx_ring->next_to_clean;
5725
5726 if (test_bit(__IGB_DOWN, &adapter->state))
5727 return true;
5728
5729 tx_buffer = &tx_ring->tx_buffer_info[i];
5730 tx_desc = IGB_TX_DESC(tx_ring, i);
5731 i -= tx_ring->count;
5732
5733 for (; budget; budget--) {
5734 eop_desc = tx_buffer->next_to_watch;
5735
5736 /* prevent any other reads prior to eop_desc */
5737 rmb();
5738
5739 /* if next_to_watch is not set then there is no work pending */
5740 if (!eop_desc)
5741 break;
5742
5743 /* if DD is not set pending work has not been completed */
5744 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5745 break;
5746
5747 /* clear next_to_watch to prevent false hangs */
5748 tx_buffer->next_to_watch = NULL;
5749
5750 /* update the statistics for this packet */
5751 total_bytes += tx_buffer->bytecount;
5752 total_packets += tx_buffer->gso_segs;
5753
5754 #ifdef CONFIG_IGB_PTP
5755 /* retrieve hardware timestamp */
5756 igb_tx_hwtstamp(q_vector, tx_buffer);
5757
5758 #endif
5759 /* free the skb */
5760 dev_kfree_skb_any(tx_buffer->skb);
5761 tx_buffer->skb = NULL;
5762
5763 /* unmap skb header data */
5764 dma_unmap_single(tx_ring->dev,
5765 tx_buffer->dma,
5766 tx_buffer->length,
5767 DMA_TO_DEVICE);
5768
5769 /* clear last DMA location and unmap remaining buffers */
5770 while (tx_desc != eop_desc) {
5771 tx_buffer->dma = 0;
5772
5773 tx_buffer++;
5774 tx_desc++;
5775 i++;
5776 if (unlikely(!i)) {
5777 i -= tx_ring->count;
5778 tx_buffer = tx_ring->tx_buffer_info;
5779 tx_desc = IGB_TX_DESC(tx_ring, 0);
5780 }
5781
5782 /* unmap any remaining paged data */
5783 if (tx_buffer->dma) {
5784 dma_unmap_page(tx_ring->dev,
5785 tx_buffer->dma,
5786 tx_buffer->length,
5787 DMA_TO_DEVICE);
5788 }
5789 }
5790
5791 /* clear last DMA location */
5792 tx_buffer->dma = 0;
5793
5794 /* move us one more past the eop_desc for start of next pkt */
5795 tx_buffer++;
5796 tx_desc++;
5797 i++;
5798 if (unlikely(!i)) {
5799 i -= tx_ring->count;
5800 tx_buffer = tx_ring->tx_buffer_info;
5801 tx_desc = IGB_TX_DESC(tx_ring, 0);
5802 }
5803 }
5804
5805 netdev_tx_completed_queue(txring_txq(tx_ring),
5806 total_packets, total_bytes);
5807 i += tx_ring->count;
5808 tx_ring->next_to_clean = i;
5809 u64_stats_update_begin(&tx_ring->tx_syncp);
5810 tx_ring->tx_stats.bytes += total_bytes;
5811 tx_ring->tx_stats.packets += total_packets;
5812 u64_stats_update_end(&tx_ring->tx_syncp);
5813 q_vector->tx.total_bytes += total_bytes;
5814 q_vector->tx.total_packets += total_packets;
5815
5816 if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5817 struct e1000_hw *hw = &adapter->hw;
5818
5819 eop_desc = tx_buffer->next_to_watch;
5820
5821 /* Detect a transmit hang in hardware, this serializes the
5822 * check with the clearing of time_stamp and movement of i */
5823 clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5824 if (eop_desc &&
5825 time_after(jiffies, tx_buffer->time_stamp +
5826 (adapter->tx_timeout_factor * HZ)) &&
5827 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5828
5829 /* detected Tx unit hang */
5830 dev_err(tx_ring->dev,
5831 "Detected Tx Unit Hang\n"
5832 " Tx Queue <%d>\n"
5833 " TDH <%x>\n"
5834 " TDT <%x>\n"
5835 " next_to_use <%x>\n"
5836 " next_to_clean <%x>\n"
5837 "buffer_info[next_to_clean]\n"
5838 " time_stamp <%lx>\n"
5839 " next_to_watch <%p>\n"
5840 " jiffies <%lx>\n"
5841 " desc.status <%x>\n",
5842 tx_ring->queue_index,
5843 rd32(E1000_TDH(tx_ring->reg_idx)),
5844 readl(tx_ring->tail),
5845 tx_ring->next_to_use,
5846 tx_ring->next_to_clean,
5847 tx_buffer->time_stamp,
5848 eop_desc,
5849 jiffies,
5850 eop_desc->wb.status);
5851 netif_stop_subqueue(tx_ring->netdev,
5852 tx_ring->queue_index);
5853
5854 /* we are about to reset, no point in enabling stuff */
5855 return true;
5856 }
5857 }
5858
5859 if (unlikely(total_packets &&
5860 netif_carrier_ok(tx_ring->netdev) &&
5861 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5862 /* Make sure that anybody stopping the queue after this
5863 * sees the new next_to_clean.
5864 */
5865 smp_mb();
5866 if (__netif_subqueue_stopped(tx_ring->netdev,
5867 tx_ring->queue_index) &&
5868 !(test_bit(__IGB_DOWN, &adapter->state))) {
5869 netif_wake_subqueue(tx_ring->netdev,
5870 tx_ring->queue_index);
5871
5872 u64_stats_update_begin(&tx_ring->tx_syncp);
5873 tx_ring->tx_stats.restart_queue++;
5874 u64_stats_update_end(&tx_ring->tx_syncp);
5875 }
5876 }
5877
5878 return !!budget;
5879 }
5880
5881 static inline void igb_rx_checksum(struct igb_ring *ring,
5882 union e1000_adv_rx_desc *rx_desc,
5883 struct sk_buff *skb)
5884 {
5885 skb_checksum_none_assert(skb);
5886
5887 /* Ignore Checksum bit is set */
5888 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5889 return;
5890
5891 /* Rx checksum disabled via ethtool */
5892 if (!(ring->netdev->features & NETIF_F_RXCSUM))
5893 return;
5894
5895 /* TCP/UDP checksum error bit is set */
5896 if (igb_test_staterr(rx_desc,
5897 E1000_RXDEXT_STATERR_TCPE |
5898 E1000_RXDEXT_STATERR_IPE)) {
5899 /*
5900 * work around errata with sctp packets where the TCPE aka
5901 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5902 * packets, (aka let the stack check the crc32c)
5903 */
5904 if (!((skb->len == 60) &&
5905 test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5906 u64_stats_update_begin(&ring->rx_syncp);
5907 ring->rx_stats.csum_err++;
5908 u64_stats_update_end(&ring->rx_syncp);
5909 }
5910 /* let the stack verify checksum errors */
5911 return;
5912 }
5913 /* It must be a TCP or UDP packet with a valid checksum */
5914 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5915 E1000_RXD_STAT_UDPCS))
5916 skb->ip_summed = CHECKSUM_UNNECESSARY;
5917
5918 dev_dbg(ring->dev, "cksum success: bits %08X\n",
5919 le32_to_cpu(rx_desc->wb.upper.status_error));
5920 }
5921
5922 static inline void igb_rx_hash(struct igb_ring *ring,
5923 union e1000_adv_rx_desc *rx_desc,
5924 struct sk_buff *skb)
5925 {
5926 if (ring->netdev->features & NETIF_F_RXHASH)
5927 skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5928 }
5929
5930 #ifdef CONFIG_IGB_PTP
5931 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
5932 union e1000_adv_rx_desc *rx_desc,
5933 struct sk_buff *skb)
5934 {
5935 struct igb_adapter *adapter = q_vector->adapter;
5936 struct e1000_hw *hw = &adapter->hw;
5937 u64 regval;
5938
5939 if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
5940 E1000_RXDADV_STAT_TS))
5941 return;
5942
5943 /*
5944 * If this bit is set, then the RX registers contain the time stamp. No
5945 * other packet will be time stamped until we read these registers, so
5946 * read the registers to make them available again. Because only one
5947 * packet can be time stamped at a time, we know that the register
5948 * values must belong to this one here and therefore we don't need to
5949 * compare any of the additional attributes stored for it.
5950 *
5951 * If nothing went wrong, then it should have a shared tx_flags that we
5952 * can turn into a skb_shared_hwtstamps.
5953 */
5954 if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
5955 u32 *stamp = (u32 *)skb->data;
5956 regval = le32_to_cpu(*(stamp + 2));
5957 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5958 skb_pull(skb, IGB_TS_HDR_LEN);
5959 } else {
5960 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5961 return;
5962
5963 regval = rd32(E1000_RXSTMPL);
5964 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5965 }
5966
5967 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5968 }
5969
5970 #endif
5971 static void igb_rx_vlan(struct igb_ring *ring,
5972 union e1000_adv_rx_desc *rx_desc,
5973 struct sk_buff *skb)
5974 {
5975 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
5976 u16 vid;
5977 if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
5978 test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
5979 vid = be16_to_cpu(rx_desc->wb.upper.vlan);
5980 else
5981 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5982
5983 __vlan_hwaccel_put_tag(skb, vid);
5984 }
5985 }
5986
5987 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
5988 {
5989 /* HW will not DMA in data larger than the given buffer, even if it
5990 * parses the (NFS, of course) header to be larger. In that case, it
5991 * fills the header buffer and spills the rest into the page.
5992 */
5993 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5994 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5995 if (hlen > IGB_RX_HDR_LEN)
5996 hlen = IGB_RX_HDR_LEN;
5997 return hlen;
5998 }
5999
6000 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
6001 {
6002 struct igb_ring *rx_ring = q_vector->rx.ring;
6003 union e1000_adv_rx_desc *rx_desc;
6004 const int current_node = numa_node_id();
6005 unsigned int total_bytes = 0, total_packets = 0;
6006 u16 cleaned_count = igb_desc_unused(rx_ring);
6007 u16 i = rx_ring->next_to_clean;
6008
6009 rx_desc = IGB_RX_DESC(rx_ring, i);
6010
6011 while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
6012 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
6013 struct sk_buff *skb = buffer_info->skb;
6014 union e1000_adv_rx_desc *next_rxd;
6015
6016 buffer_info->skb = NULL;
6017 prefetch(skb->data);
6018
6019 i++;
6020 if (i == rx_ring->count)
6021 i = 0;
6022
6023 next_rxd = IGB_RX_DESC(rx_ring, i);
6024 prefetch(next_rxd);
6025
6026 /*
6027 * This memory barrier is needed to keep us from reading
6028 * any other fields out of the rx_desc until we know the
6029 * RXD_STAT_DD bit is set
6030 */
6031 rmb();
6032
6033 if (!skb_is_nonlinear(skb)) {
6034 __skb_put(skb, igb_get_hlen(rx_desc));
6035 dma_unmap_single(rx_ring->dev, buffer_info->dma,
6036 IGB_RX_HDR_LEN,
6037 DMA_FROM_DEVICE);
6038 buffer_info->dma = 0;
6039 }
6040
6041 if (rx_desc->wb.upper.length) {
6042 u16 length = le16_to_cpu(rx_desc->wb.upper.length);
6043
6044 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
6045 buffer_info->page,
6046 buffer_info->page_offset,
6047 length);
6048
6049 skb->len += length;
6050 skb->data_len += length;
6051 skb->truesize += PAGE_SIZE / 2;
6052
6053 if ((page_count(buffer_info->page) != 1) ||
6054 (page_to_nid(buffer_info->page) != current_node))
6055 buffer_info->page = NULL;
6056 else
6057 get_page(buffer_info->page);
6058
6059 dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
6060 PAGE_SIZE / 2, DMA_FROM_DEVICE);
6061 buffer_info->page_dma = 0;
6062 }
6063
6064 if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
6065 struct igb_rx_buffer *next_buffer;
6066 next_buffer = &rx_ring->rx_buffer_info[i];
6067 buffer_info->skb = next_buffer->skb;
6068 buffer_info->dma = next_buffer->dma;
6069 next_buffer->skb = skb;
6070 next_buffer->dma = 0;
6071 goto next_desc;
6072 }
6073
6074 if (unlikely((igb_test_staterr(rx_desc,
6075 E1000_RXDEXT_ERR_FRAME_ERR_MASK))
6076 && !(rx_ring->netdev->features & NETIF_F_RXALL))) {
6077 dev_kfree_skb_any(skb);
6078 goto next_desc;
6079 }
6080
6081 #ifdef CONFIG_IGB_PTP
6082 igb_rx_hwtstamp(q_vector, rx_desc, skb);
6083 #endif
6084 igb_rx_hash(rx_ring, rx_desc, skb);
6085 igb_rx_checksum(rx_ring, rx_desc, skb);
6086 igb_rx_vlan(rx_ring, rx_desc, skb);
6087
6088 total_bytes += skb->len;
6089 total_packets++;
6090
6091 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6092
6093 napi_gro_receive(&q_vector->napi, skb);
6094
6095 budget--;
6096 next_desc:
6097 if (!budget)
6098 break;
6099
6100 cleaned_count++;
6101 /* return some buffers to hardware, one at a time is too slow */
6102 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6103 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6104 cleaned_count = 0;
6105 }
6106
6107 /* use prefetched values */
6108 rx_desc = next_rxd;
6109 }
6110
6111 rx_ring->next_to_clean = i;
6112 u64_stats_update_begin(&rx_ring->rx_syncp);
6113 rx_ring->rx_stats.packets += total_packets;
6114 rx_ring->rx_stats.bytes += total_bytes;
6115 u64_stats_update_end(&rx_ring->rx_syncp);
6116 q_vector->rx.total_packets += total_packets;
6117 q_vector->rx.total_bytes += total_bytes;
6118
6119 if (cleaned_count)
6120 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6121
6122 return !!budget;
6123 }
6124
6125 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6126 struct igb_rx_buffer *bi)
6127 {
6128 struct sk_buff *skb = bi->skb;
6129 dma_addr_t dma = bi->dma;
6130
6131 if (dma)
6132 return true;
6133
6134 if (likely(!skb)) {
6135 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6136 IGB_RX_HDR_LEN);
6137 bi->skb = skb;
6138 if (!skb) {
6139 rx_ring->rx_stats.alloc_failed++;
6140 return false;
6141 }
6142
6143 /* initialize skb for ring */
6144 skb_record_rx_queue(skb, rx_ring->queue_index);
6145 }
6146
6147 dma = dma_map_single(rx_ring->dev, skb->data,
6148 IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6149
6150 if (dma_mapping_error(rx_ring->dev, dma)) {
6151 rx_ring->rx_stats.alloc_failed++;
6152 return false;
6153 }
6154
6155 bi->dma = dma;
6156 return true;
6157 }
6158
6159 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6160 struct igb_rx_buffer *bi)
6161 {
6162 struct page *page = bi->page;
6163 dma_addr_t page_dma = bi->page_dma;
6164 unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6165
6166 if (page_dma)
6167 return true;
6168
6169 if (!page) {
6170 page = alloc_page(GFP_ATOMIC | __GFP_COLD);
6171 bi->page = page;
6172 if (unlikely(!page)) {
6173 rx_ring->rx_stats.alloc_failed++;
6174 return false;
6175 }
6176 }
6177
6178 page_dma = dma_map_page(rx_ring->dev, page,
6179 page_offset, PAGE_SIZE / 2,
6180 DMA_FROM_DEVICE);
6181
6182 if (dma_mapping_error(rx_ring->dev, page_dma)) {
6183 rx_ring->rx_stats.alloc_failed++;
6184 return false;
6185 }
6186
6187 bi->page_dma = page_dma;
6188 bi->page_offset = page_offset;
6189 return true;
6190 }
6191
6192 /**
6193 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6194 * @adapter: address of board private structure
6195 **/
6196 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6197 {
6198 union e1000_adv_rx_desc *rx_desc;
6199 struct igb_rx_buffer *bi;
6200 u16 i = rx_ring->next_to_use;
6201
6202 rx_desc = IGB_RX_DESC(rx_ring, i);
6203 bi = &rx_ring->rx_buffer_info[i];
6204 i -= rx_ring->count;
6205
6206 while (cleaned_count--) {
6207 if (!igb_alloc_mapped_skb(rx_ring, bi))
6208 break;
6209
6210 /* Refresh the desc even if buffer_addrs didn't change
6211 * because each write-back erases this info. */
6212 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6213
6214 if (!igb_alloc_mapped_page(rx_ring, bi))
6215 break;
6216
6217 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6218
6219 rx_desc++;
6220 bi++;
6221 i++;
6222 if (unlikely(!i)) {
6223 rx_desc = IGB_RX_DESC(rx_ring, 0);
6224 bi = rx_ring->rx_buffer_info;
6225 i -= rx_ring->count;
6226 }
6227
6228 /* clear the hdr_addr for the next_to_use descriptor */
6229 rx_desc->read.hdr_addr = 0;
6230 }
6231
6232 i += rx_ring->count;
6233
6234 if (rx_ring->next_to_use != i) {
6235 rx_ring->next_to_use = i;
6236
6237 /* Force memory writes to complete before letting h/w
6238 * know there are new descriptors to fetch. (Only
6239 * applicable for weak-ordered memory model archs,
6240 * such as IA-64). */
6241 wmb();
6242 writel(i, rx_ring->tail);
6243 }
6244 }
6245
6246 /**
6247 * igb_mii_ioctl -
6248 * @netdev:
6249 * @ifreq:
6250 * @cmd:
6251 **/
6252 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6253 {
6254 struct igb_adapter *adapter = netdev_priv(netdev);
6255 struct mii_ioctl_data *data = if_mii(ifr);
6256
6257 if (adapter->hw.phy.media_type != e1000_media_type_copper)
6258 return -EOPNOTSUPP;
6259
6260 switch (cmd) {
6261 case SIOCGMIIPHY:
6262 data->phy_id = adapter->hw.phy.addr;
6263 break;
6264 case SIOCGMIIREG:
6265 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6266 &data->val_out))
6267 return -EIO;
6268 break;
6269 case SIOCSMIIREG:
6270 default:
6271 return -EOPNOTSUPP;
6272 }
6273 return 0;
6274 }
6275
6276 /**
6277 * igb_hwtstamp_ioctl - control hardware time stamping
6278 * @netdev:
6279 * @ifreq:
6280 * @cmd:
6281 *
6282 * Outgoing time stamping can be enabled and disabled. Play nice and
6283 * disable it when requested, although it shouldn't case any overhead
6284 * when no packet needs it. At most one packet in the queue may be
6285 * marked for time stamping, otherwise it would be impossible to tell
6286 * for sure to which packet the hardware time stamp belongs.
6287 *
6288 * Incoming time stamping has to be configured via the hardware
6289 * filters. Not all combinations are supported, in particular event
6290 * type has to be specified. Matching the kind of event packet is
6291 * not supported, with the exception of "all V2 events regardless of
6292 * level 2 or 4".
6293 *
6294 **/
6295 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6296 struct ifreq *ifr, int cmd)
6297 {
6298 struct igb_adapter *adapter = netdev_priv(netdev);
6299 struct e1000_hw *hw = &adapter->hw;
6300 struct hwtstamp_config config;
6301 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6302 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6303 u32 tsync_rx_cfg = 0;
6304 bool is_l4 = false;
6305 bool is_l2 = false;
6306 u32 regval;
6307
6308 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6309 return -EFAULT;
6310
6311 /* reserved for future extensions */
6312 if (config.flags)
6313 return -EINVAL;
6314
6315 switch (config.tx_type) {
6316 case HWTSTAMP_TX_OFF:
6317 tsync_tx_ctl = 0;
6318 case HWTSTAMP_TX_ON:
6319 break;
6320 default:
6321 return -ERANGE;
6322 }
6323
6324 switch (config.rx_filter) {
6325 case HWTSTAMP_FILTER_NONE:
6326 tsync_rx_ctl = 0;
6327 break;
6328 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6329 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6330 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6331 case HWTSTAMP_FILTER_ALL:
6332 /*
6333 * register TSYNCRXCFG must be set, therefore it is not
6334 * possible to time stamp both Sync and Delay_Req messages
6335 * => fall back to time stamping all packets
6336 */
6337 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6338 config.rx_filter = HWTSTAMP_FILTER_ALL;
6339 break;
6340 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6341 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6342 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6343 is_l4 = true;
6344 break;
6345 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6346 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6347 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6348 is_l4 = true;
6349 break;
6350 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6351 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6352 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6353 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6354 is_l2 = true;
6355 is_l4 = true;
6356 config.rx_filter = HWTSTAMP_FILTER_SOME;
6357 break;
6358 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6359 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6360 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6361 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6362 is_l2 = true;
6363 is_l4 = true;
6364 config.rx_filter = HWTSTAMP_FILTER_SOME;
6365 break;
6366 case HWTSTAMP_FILTER_PTP_V2_EVENT:
6367 case HWTSTAMP_FILTER_PTP_V2_SYNC:
6368 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6369 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6370 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6371 is_l2 = true;
6372 is_l4 = true;
6373 break;
6374 default:
6375 return -ERANGE;
6376 }
6377
6378 if (hw->mac.type == e1000_82575) {
6379 if (tsync_rx_ctl | tsync_tx_ctl)
6380 return -EINVAL;
6381 return 0;
6382 }
6383
6384 /*
6385 * Per-packet timestamping only works if all packets are
6386 * timestamped, so enable timestamping in all packets as
6387 * long as one rx filter was configured.
6388 */
6389 if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
6390 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6391 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6392 }
6393
6394 /* enable/disable TX */
6395 regval = rd32(E1000_TSYNCTXCTL);
6396 regval &= ~E1000_TSYNCTXCTL_ENABLED;
6397 regval |= tsync_tx_ctl;
6398 wr32(E1000_TSYNCTXCTL, regval);
6399
6400 /* enable/disable RX */
6401 regval = rd32(E1000_TSYNCRXCTL);
6402 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6403 regval |= tsync_rx_ctl;
6404 wr32(E1000_TSYNCRXCTL, regval);
6405
6406 /* define which PTP packets are time stamped */
6407 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6408
6409 /* define ethertype filter for timestamped packets */
6410 if (is_l2)
6411 wr32(E1000_ETQF(3),
6412 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6413 E1000_ETQF_1588 | /* enable timestamping */
6414 ETH_P_1588)); /* 1588 eth protocol type */
6415 else
6416 wr32(E1000_ETQF(3), 0);
6417
6418 #define PTP_PORT 319
6419 /* L4 Queue Filter[3]: filter by destination port and protocol */
6420 if (is_l4) {
6421 u32 ftqf = (IPPROTO_UDP /* UDP */
6422 | E1000_FTQF_VF_BP /* VF not compared */
6423 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6424 | E1000_FTQF_MASK); /* mask all inputs */
6425 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6426
6427 wr32(E1000_IMIR(3), htons(PTP_PORT));
6428 wr32(E1000_IMIREXT(3),
6429 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6430 if (hw->mac.type == e1000_82576) {
6431 /* enable source port check */
6432 wr32(E1000_SPQF(3), htons(PTP_PORT));
6433 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6434 }
6435 wr32(E1000_FTQF(3), ftqf);
6436 } else {
6437 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6438 }
6439 wrfl();
6440
6441 adapter->hwtstamp_config = config;
6442
6443 /* clear TX/RX time stamp registers, just to be sure */
6444 regval = rd32(E1000_TXSTMPH);
6445 regval = rd32(E1000_RXSTMPH);
6446
6447 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6448 -EFAULT : 0;
6449 }
6450
6451 /**
6452 * igb_ioctl -
6453 * @netdev:
6454 * @ifreq:
6455 * @cmd:
6456 **/
6457 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6458 {
6459 switch (cmd) {
6460 case SIOCGMIIPHY:
6461 case SIOCGMIIREG:
6462 case SIOCSMIIREG:
6463 return igb_mii_ioctl(netdev, ifr, cmd);
6464 case SIOCSHWTSTAMP:
6465 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6466 default:
6467 return -EOPNOTSUPP;
6468 }
6469 }
6470
6471 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6472 {
6473 struct igb_adapter *adapter = hw->back;
6474 u16 cap_offset;
6475
6476 cap_offset = adapter->pdev->pcie_cap;
6477 if (!cap_offset)
6478 return -E1000_ERR_CONFIG;
6479
6480 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6481
6482 return 0;
6483 }
6484
6485 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6486 {
6487 struct igb_adapter *adapter = hw->back;
6488 u16 cap_offset;
6489
6490 cap_offset = adapter->pdev->pcie_cap;
6491 if (!cap_offset)
6492 return -E1000_ERR_CONFIG;
6493
6494 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6495
6496 return 0;
6497 }
6498
6499 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
6500 {
6501 struct igb_adapter *adapter = netdev_priv(netdev);
6502 struct e1000_hw *hw = &adapter->hw;
6503 u32 ctrl, rctl;
6504 bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6505
6506 if (enable) {
6507 /* enable VLAN tag insert/strip */
6508 ctrl = rd32(E1000_CTRL);
6509 ctrl |= E1000_CTRL_VME;
6510 wr32(E1000_CTRL, ctrl);
6511
6512 /* Disable CFI check */
6513 rctl = rd32(E1000_RCTL);
6514 rctl &= ~E1000_RCTL_CFIEN;
6515 wr32(E1000_RCTL, rctl);
6516 } else {
6517 /* disable VLAN tag insert/strip */
6518 ctrl = rd32(E1000_CTRL);
6519 ctrl &= ~E1000_CTRL_VME;
6520 wr32(E1000_CTRL, ctrl);
6521 }
6522
6523 igb_rlpml_set(adapter);
6524 }
6525
6526 static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6527 {
6528 struct igb_adapter *adapter = netdev_priv(netdev);
6529 struct e1000_hw *hw = &adapter->hw;
6530 int pf_id = adapter->vfs_allocated_count;
6531
6532 /* attempt to add filter to vlvf array */
6533 igb_vlvf_set(adapter, vid, true, pf_id);
6534
6535 /* add the filter since PF can receive vlans w/o entry in vlvf */
6536 igb_vfta_set(hw, vid, true);
6537
6538 set_bit(vid, adapter->active_vlans);
6539
6540 return 0;
6541 }
6542
6543 static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6544 {
6545 struct igb_adapter *adapter = netdev_priv(netdev);
6546 struct e1000_hw *hw = &adapter->hw;
6547 int pf_id = adapter->vfs_allocated_count;
6548 s32 err;
6549
6550 /* remove vlan from VLVF table array */
6551 err = igb_vlvf_set(adapter, vid, false, pf_id);
6552
6553 /* if vid was not present in VLVF just remove it from table */
6554 if (err)
6555 igb_vfta_set(hw, vid, false);
6556
6557 clear_bit(vid, adapter->active_vlans);
6558
6559 return 0;
6560 }
6561
6562 static void igb_restore_vlan(struct igb_adapter *adapter)
6563 {
6564 u16 vid;
6565
6566 igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6567
6568 for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6569 igb_vlan_rx_add_vid(adapter->netdev, vid);
6570 }
6571
6572 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6573 {
6574 struct pci_dev *pdev = adapter->pdev;
6575 struct e1000_mac_info *mac = &adapter->hw.mac;
6576
6577 mac->autoneg = 0;
6578
6579 /* Make sure dplx is at most 1 bit and lsb of speed is not set
6580 * for the switch() below to work */
6581 if ((spd & 1) || (dplx & ~1))
6582 goto err_inval;
6583
6584 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6585 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6586 spd != SPEED_1000 &&
6587 dplx != DUPLEX_FULL)
6588 goto err_inval;
6589
6590 switch (spd + dplx) {
6591 case SPEED_10 + DUPLEX_HALF:
6592 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6593 break;
6594 case SPEED_10 + DUPLEX_FULL:
6595 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6596 break;
6597 case SPEED_100 + DUPLEX_HALF:
6598 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6599 break;
6600 case SPEED_100 + DUPLEX_FULL:
6601 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6602 break;
6603 case SPEED_1000 + DUPLEX_FULL:
6604 mac->autoneg = 1;
6605 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6606 break;
6607 case SPEED_1000 + DUPLEX_HALF: /* not supported */
6608 default:
6609 goto err_inval;
6610 }
6611 return 0;
6612
6613 err_inval:
6614 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6615 return -EINVAL;
6616 }
6617
6618 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
6619 bool runtime)
6620 {
6621 struct net_device *netdev = pci_get_drvdata(pdev);
6622 struct igb_adapter *adapter = netdev_priv(netdev);
6623 struct e1000_hw *hw = &adapter->hw;
6624 u32 ctrl, rctl, status;
6625 u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
6626 #ifdef CONFIG_PM
6627 int retval = 0;
6628 #endif
6629
6630 netif_device_detach(netdev);
6631
6632 if (netif_running(netdev))
6633 __igb_close(netdev, true);
6634
6635 igb_clear_interrupt_scheme(adapter);
6636
6637 #ifdef CONFIG_PM
6638 retval = pci_save_state(pdev);
6639 if (retval)
6640 return retval;
6641 #endif
6642
6643 status = rd32(E1000_STATUS);
6644 if (status & E1000_STATUS_LU)
6645 wufc &= ~E1000_WUFC_LNKC;
6646
6647 if (wufc) {
6648 igb_setup_rctl(adapter);
6649 igb_set_rx_mode(netdev);
6650
6651 /* turn on all-multi mode if wake on multicast is enabled */
6652 if (wufc & E1000_WUFC_MC) {
6653 rctl = rd32(E1000_RCTL);
6654 rctl |= E1000_RCTL_MPE;
6655 wr32(E1000_RCTL, rctl);
6656 }
6657
6658 ctrl = rd32(E1000_CTRL);
6659 /* advertise wake from D3Cold */
6660 #define E1000_CTRL_ADVD3WUC 0x00100000
6661 /* phy power management enable */
6662 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6663 ctrl |= E1000_CTRL_ADVD3WUC;
6664 wr32(E1000_CTRL, ctrl);
6665
6666 /* Allow time for pending master requests to run */
6667 igb_disable_pcie_master(hw);
6668
6669 wr32(E1000_WUC, E1000_WUC_PME_EN);
6670 wr32(E1000_WUFC, wufc);
6671 } else {
6672 wr32(E1000_WUC, 0);
6673 wr32(E1000_WUFC, 0);
6674 }
6675
6676 *enable_wake = wufc || adapter->en_mng_pt;
6677 if (!*enable_wake)
6678 igb_power_down_link(adapter);
6679 else
6680 igb_power_up_link(adapter);
6681
6682 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6683 * would have already happened in close and is redundant. */
6684 igb_release_hw_control(adapter);
6685
6686 pci_disable_device(pdev);
6687
6688 return 0;
6689 }
6690
6691 #ifdef CONFIG_PM
6692 #ifdef CONFIG_PM_SLEEP
6693 static int igb_suspend(struct device *dev)
6694 {
6695 int retval;
6696 bool wake;
6697 struct pci_dev *pdev = to_pci_dev(dev);
6698
6699 retval = __igb_shutdown(pdev, &wake, 0);
6700 if (retval)
6701 return retval;
6702
6703 if (wake) {
6704 pci_prepare_to_sleep(pdev);
6705 } else {
6706 pci_wake_from_d3(pdev, false);
6707 pci_set_power_state(pdev, PCI_D3hot);
6708 }
6709
6710 return 0;
6711 }
6712 #endif /* CONFIG_PM_SLEEP */
6713
6714 static int igb_resume(struct device *dev)
6715 {
6716 struct pci_dev *pdev = to_pci_dev(dev);
6717 struct net_device *netdev = pci_get_drvdata(pdev);
6718 struct igb_adapter *adapter = netdev_priv(netdev);
6719 struct e1000_hw *hw = &adapter->hw;
6720 u32 err;
6721
6722 pci_set_power_state(pdev, PCI_D0);
6723 pci_restore_state(pdev);
6724 pci_save_state(pdev);
6725
6726 err = pci_enable_device_mem(pdev);
6727 if (err) {
6728 dev_err(&pdev->dev,
6729 "igb: Cannot enable PCI device from suspend\n");
6730 return err;
6731 }
6732 pci_set_master(pdev);
6733
6734 pci_enable_wake(pdev, PCI_D3hot, 0);
6735 pci_enable_wake(pdev, PCI_D3cold, 0);
6736
6737 if (igb_init_interrupt_scheme(adapter)) {
6738 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6739 return -ENOMEM;
6740 }
6741
6742 igb_reset(adapter);
6743
6744 /* let the f/w know that the h/w is now under the control of the
6745 * driver. */
6746 igb_get_hw_control(adapter);
6747
6748 wr32(E1000_WUS, ~0);
6749
6750 if (netdev->flags & IFF_UP) {
6751 err = __igb_open(netdev, true);
6752 if (err)
6753 return err;
6754 }
6755
6756 netif_device_attach(netdev);
6757 return 0;
6758 }
6759
6760 #ifdef CONFIG_PM_RUNTIME
6761 static int igb_runtime_idle(struct device *dev)
6762 {
6763 struct pci_dev *pdev = to_pci_dev(dev);
6764 struct net_device *netdev = pci_get_drvdata(pdev);
6765 struct igb_adapter *adapter = netdev_priv(netdev);
6766
6767 if (!igb_has_link(adapter))
6768 pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
6769
6770 return -EBUSY;
6771 }
6772
6773 static int igb_runtime_suspend(struct device *dev)
6774 {
6775 struct pci_dev *pdev = to_pci_dev(dev);
6776 int retval;
6777 bool wake;
6778
6779 retval = __igb_shutdown(pdev, &wake, 1);
6780 if (retval)
6781 return retval;
6782
6783 if (wake) {
6784 pci_prepare_to_sleep(pdev);
6785 } else {
6786 pci_wake_from_d3(pdev, false);
6787 pci_set_power_state(pdev, PCI_D3hot);
6788 }
6789
6790 return 0;
6791 }
6792
6793 static int igb_runtime_resume(struct device *dev)
6794 {
6795 return igb_resume(dev);
6796 }
6797 #endif /* CONFIG_PM_RUNTIME */
6798 #endif
6799
6800 static void igb_shutdown(struct pci_dev *pdev)
6801 {
6802 bool wake;
6803
6804 __igb_shutdown(pdev, &wake, 0);
6805
6806 if (system_state == SYSTEM_POWER_OFF) {
6807 pci_wake_from_d3(pdev, wake);
6808 pci_set_power_state(pdev, PCI_D3hot);
6809 }
6810 }
6811
6812 #ifdef CONFIG_NET_POLL_CONTROLLER
6813 /*
6814 * Polling 'interrupt' - used by things like netconsole to send skbs
6815 * without having to re-enable interrupts. It's not called while
6816 * the interrupt routine is executing.
6817 */
6818 static void igb_netpoll(struct net_device *netdev)
6819 {
6820 struct igb_adapter *adapter = netdev_priv(netdev);
6821 struct e1000_hw *hw = &adapter->hw;
6822 struct igb_q_vector *q_vector;
6823 int i;
6824
6825 for (i = 0; i < adapter->num_q_vectors; i++) {
6826 q_vector = adapter->q_vector[i];
6827 if (adapter->msix_entries)
6828 wr32(E1000_EIMC, q_vector->eims_value);
6829 else
6830 igb_irq_disable(adapter);
6831 napi_schedule(&q_vector->napi);
6832 }
6833 }
6834 #endif /* CONFIG_NET_POLL_CONTROLLER */
6835
6836 /**
6837 * igb_io_error_detected - called when PCI error is detected
6838 * @pdev: Pointer to PCI device
6839 * @state: The current pci connection state
6840 *
6841 * This function is called after a PCI bus error affecting
6842 * this device has been detected.
6843 */
6844 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6845 pci_channel_state_t state)
6846 {
6847 struct net_device *netdev = pci_get_drvdata(pdev);
6848 struct igb_adapter *adapter = netdev_priv(netdev);
6849
6850 netif_device_detach(netdev);
6851
6852 if (state == pci_channel_io_perm_failure)
6853 return PCI_ERS_RESULT_DISCONNECT;
6854
6855 if (netif_running(netdev))
6856 igb_down(adapter);
6857 pci_disable_device(pdev);
6858
6859 /* Request a slot slot reset. */
6860 return PCI_ERS_RESULT_NEED_RESET;
6861 }
6862
6863 /**
6864 * igb_io_slot_reset - called after the pci bus has been reset.
6865 * @pdev: Pointer to PCI device
6866 *
6867 * Restart the card from scratch, as if from a cold-boot. Implementation
6868 * resembles the first-half of the igb_resume routine.
6869 */
6870 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6871 {
6872 struct net_device *netdev = pci_get_drvdata(pdev);
6873 struct igb_adapter *adapter = netdev_priv(netdev);
6874 struct e1000_hw *hw = &adapter->hw;
6875 pci_ers_result_t result;
6876 int err;
6877
6878 if (pci_enable_device_mem(pdev)) {
6879 dev_err(&pdev->dev,
6880 "Cannot re-enable PCI device after reset.\n");
6881 result = PCI_ERS_RESULT_DISCONNECT;
6882 } else {
6883 pci_set_master(pdev);
6884 pci_restore_state(pdev);
6885 pci_save_state(pdev);
6886
6887 pci_enable_wake(pdev, PCI_D3hot, 0);
6888 pci_enable_wake(pdev, PCI_D3cold, 0);
6889
6890 igb_reset(adapter);
6891 wr32(E1000_WUS, ~0);
6892 result = PCI_ERS_RESULT_RECOVERED;
6893 }
6894
6895 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6896 if (err) {
6897 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6898 "failed 0x%0x\n", err);
6899 /* non-fatal, continue */
6900 }
6901
6902 return result;
6903 }
6904
6905 /**
6906 * igb_io_resume - called when traffic can start flowing again.
6907 * @pdev: Pointer to PCI device
6908 *
6909 * This callback is called when the error recovery driver tells us that
6910 * its OK to resume normal operation. Implementation resembles the
6911 * second-half of the igb_resume routine.
6912 */
6913 static void igb_io_resume(struct pci_dev *pdev)
6914 {
6915 struct net_device *netdev = pci_get_drvdata(pdev);
6916 struct igb_adapter *adapter = netdev_priv(netdev);
6917
6918 if (netif_running(netdev)) {
6919 if (igb_up(adapter)) {
6920 dev_err(&pdev->dev, "igb_up failed after reset\n");
6921 return;
6922 }
6923 }
6924
6925 netif_device_attach(netdev);
6926
6927 /* let the f/w know that the h/w is now under the control of the
6928 * driver. */
6929 igb_get_hw_control(adapter);
6930 }
6931
6932 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6933 u8 qsel)
6934 {
6935 u32 rar_low, rar_high;
6936 struct e1000_hw *hw = &adapter->hw;
6937
6938 /* HW expects these in little endian so we reverse the byte order
6939 * from network order (big endian) to little endian
6940 */
6941 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6942 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6943 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6944
6945 /* Indicate to hardware the Address is Valid. */
6946 rar_high |= E1000_RAH_AV;
6947
6948 if (hw->mac.type == e1000_82575)
6949 rar_high |= E1000_RAH_POOL_1 * qsel;
6950 else
6951 rar_high |= E1000_RAH_POOL_1 << qsel;
6952
6953 wr32(E1000_RAL(index), rar_low);
6954 wrfl();
6955 wr32(E1000_RAH(index), rar_high);
6956 wrfl();
6957 }
6958
6959 static int igb_set_vf_mac(struct igb_adapter *adapter,
6960 int vf, unsigned char *mac_addr)
6961 {
6962 struct e1000_hw *hw = &adapter->hw;
6963 /* VF MAC addresses start at end of receive addresses and moves
6964 * torwards the first, as a result a collision should not be possible */
6965 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6966
6967 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6968
6969 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6970
6971 return 0;
6972 }
6973
6974 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6975 {
6976 struct igb_adapter *adapter = netdev_priv(netdev);
6977 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6978 return -EINVAL;
6979 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6980 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6981 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6982 " change effective.");
6983 if (test_bit(__IGB_DOWN, &adapter->state)) {
6984 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6985 " but the PF device is not up.\n");
6986 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6987 " attempting to use the VF device.\n");
6988 }
6989 return igb_set_vf_mac(adapter, vf, mac);
6990 }
6991
6992 static int igb_link_mbps(int internal_link_speed)
6993 {
6994 switch (internal_link_speed) {
6995 case SPEED_100:
6996 return 100;
6997 case SPEED_1000:
6998 return 1000;
6999 default:
7000 return 0;
7001 }
7002 }
7003
7004 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
7005 int link_speed)
7006 {
7007 int rf_dec, rf_int;
7008 u32 bcnrc_val;
7009
7010 if (tx_rate != 0) {
7011 /* Calculate the rate factor values to set */
7012 rf_int = link_speed / tx_rate;
7013 rf_dec = (link_speed - (rf_int * tx_rate));
7014 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
7015
7016 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
7017 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
7018 E1000_RTTBCNRC_RF_INT_MASK);
7019 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
7020 } else {
7021 bcnrc_val = 0;
7022 }
7023
7024 wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
7025 /*
7026 * Set global transmit compensation time to the MMW_SIZE in RTTBCNRM
7027 * register. MMW_SIZE=0x014 if 9728-byte jumbo is supported.
7028 */
7029 wr32(E1000_RTTBCNRM, 0x14);
7030 wr32(E1000_RTTBCNRC, bcnrc_val);
7031 }
7032
7033 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
7034 {
7035 int actual_link_speed, i;
7036 bool reset_rate = false;
7037
7038 /* VF TX rate limit was not set or not supported */
7039 if ((adapter->vf_rate_link_speed == 0) ||
7040 (adapter->hw.mac.type != e1000_82576))
7041 return;
7042
7043 actual_link_speed = igb_link_mbps(adapter->link_speed);
7044 if (actual_link_speed != adapter->vf_rate_link_speed) {
7045 reset_rate = true;
7046 adapter->vf_rate_link_speed = 0;
7047 dev_info(&adapter->pdev->dev,
7048 "Link speed has been changed. VF Transmit "
7049 "rate is disabled\n");
7050 }
7051
7052 for (i = 0; i < adapter->vfs_allocated_count; i++) {
7053 if (reset_rate)
7054 adapter->vf_data[i].tx_rate = 0;
7055
7056 igb_set_vf_rate_limit(&adapter->hw, i,
7057 adapter->vf_data[i].tx_rate,
7058 actual_link_speed);
7059 }
7060 }
7061
7062 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
7063 {
7064 struct igb_adapter *adapter = netdev_priv(netdev);
7065 struct e1000_hw *hw = &adapter->hw;
7066 int actual_link_speed;
7067
7068 if (hw->mac.type != e1000_82576)
7069 return -EOPNOTSUPP;
7070
7071 actual_link_speed = igb_link_mbps(adapter->link_speed);
7072 if ((vf >= adapter->vfs_allocated_count) ||
7073 (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
7074 (tx_rate < 0) || (tx_rate > actual_link_speed))
7075 return -EINVAL;
7076
7077 adapter->vf_rate_link_speed = actual_link_speed;
7078 adapter->vf_data[vf].tx_rate = (u16)tx_rate;
7079 igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
7080
7081 return 0;
7082 }
7083
7084 static int igb_ndo_get_vf_config(struct net_device *netdev,
7085 int vf, struct ifla_vf_info *ivi)
7086 {
7087 struct igb_adapter *adapter = netdev_priv(netdev);
7088 if (vf >= adapter->vfs_allocated_count)
7089 return -EINVAL;
7090 ivi->vf = vf;
7091 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
7092 ivi->tx_rate = adapter->vf_data[vf].tx_rate;
7093 ivi->vlan = adapter->vf_data[vf].pf_vlan;
7094 ivi->qos = adapter->vf_data[vf].pf_qos;
7095 return 0;
7096 }
7097
7098 static void igb_vmm_control(struct igb_adapter *adapter)
7099 {
7100 struct e1000_hw *hw = &adapter->hw;
7101 u32 reg;
7102
7103 switch (hw->mac.type) {
7104 case e1000_82575:
7105 case e1000_i210:
7106 case e1000_i211:
7107 default:
7108 /* replication is not supported for 82575 */
7109 return;
7110 case e1000_82576:
7111 /* notify HW that the MAC is adding vlan tags */
7112 reg = rd32(E1000_DTXCTL);
7113 reg |= E1000_DTXCTL_VLAN_ADDED;
7114 wr32(E1000_DTXCTL, reg);
7115 case e1000_82580:
7116 /* enable replication vlan tag stripping */
7117 reg = rd32(E1000_RPLOLR);
7118 reg |= E1000_RPLOLR_STRVLAN;
7119 wr32(E1000_RPLOLR, reg);
7120 case e1000_i350:
7121 /* none of the above registers are supported by i350 */
7122 break;
7123 }
7124
7125 if (adapter->vfs_allocated_count) {
7126 igb_vmdq_set_loopback_pf(hw, true);
7127 igb_vmdq_set_replication_pf(hw, true);
7128 igb_vmdq_set_anti_spoofing_pf(hw, true,
7129 adapter->vfs_allocated_count);
7130 } else {
7131 igb_vmdq_set_loopback_pf(hw, false);
7132 igb_vmdq_set_replication_pf(hw, false);
7133 }
7134 }
7135
7136 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
7137 {
7138 struct e1000_hw *hw = &adapter->hw;
7139 u32 dmac_thr;
7140 u16 hwm;
7141
7142 if (hw->mac.type > e1000_82580) {
7143 if (adapter->flags & IGB_FLAG_DMAC) {
7144 u32 reg;
7145
7146 /* force threshold to 0. */
7147 wr32(E1000_DMCTXTH, 0);
7148
7149 /*
7150 * DMA Coalescing high water mark needs to be greater
7151 * than the Rx threshold. Set hwm to PBA - max frame
7152 * size in 16B units, capping it at PBA - 6KB.
7153 */
7154 hwm = 64 * pba - adapter->max_frame_size / 16;
7155 if (hwm < 64 * (pba - 6))
7156 hwm = 64 * (pba - 6);
7157 reg = rd32(E1000_FCRTC);
7158 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
7159 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
7160 & E1000_FCRTC_RTH_COAL_MASK);
7161 wr32(E1000_FCRTC, reg);
7162
7163 /*
7164 * Set the DMA Coalescing Rx threshold to PBA - 2 * max
7165 * frame size, capping it at PBA - 10KB.
7166 */
7167 dmac_thr = pba - adapter->max_frame_size / 512;
7168 if (dmac_thr < pba - 10)
7169 dmac_thr = pba - 10;
7170 reg = rd32(E1000_DMACR);
7171 reg &= ~E1000_DMACR_DMACTHR_MASK;
7172 reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
7173 & E1000_DMACR_DMACTHR_MASK);
7174
7175 /* transition to L0x or L1 if available..*/
7176 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7177
7178 /* watchdog timer= +-1000 usec in 32usec intervals */
7179 reg |= (1000 >> 5);
7180
7181 /* Disable BMC-to-OS Watchdog Enable */
7182 reg &= ~E1000_DMACR_DC_BMC2OSW_EN;
7183 wr32(E1000_DMACR, reg);
7184
7185 /*
7186 * no lower threshold to disable
7187 * coalescing(smart fifb)-UTRESH=0
7188 */
7189 wr32(E1000_DMCRTRH, 0);
7190
7191 reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7192
7193 wr32(E1000_DMCTLX, reg);
7194
7195 /*
7196 * free space in tx packet buffer to wake from
7197 * DMA coal
7198 */
7199 wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7200 (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7201
7202 /*
7203 * make low power state decision controlled
7204 * by DMA coal
7205 */
7206 reg = rd32(E1000_PCIEMISC);
7207 reg &= ~E1000_PCIEMISC_LX_DECISION;
7208 wr32(E1000_PCIEMISC, reg);
7209 } /* endif adapter->dmac is not disabled */
7210 } else if (hw->mac.type == e1000_82580) {
7211 u32 reg = rd32(E1000_PCIEMISC);
7212 wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7213 wr32(E1000_DMACR, 0);
7214 }
7215 }
7216
7217 /* igb_main.c */
This page took 0.300149 seconds and 4 git commands to generate.