igb: Combine all flag info fields into a single tx_flags structure
[deliverable/linux.git] / drivers / net / ethernet / intel / igb / igb_main.c
1 /*******************************************************************************
2
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2011 Intel Corporation.
5
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
9
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 more details.
14
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
21
22 Contact Information:
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/bitops.h>
32 #include <linux/vmalloc.h>
33 #include <linux/pagemap.h>
34 #include <linux/netdevice.h>
35 #include <linux/ipv6.h>
36 #include <linux/slab.h>
37 #include <net/checksum.h>
38 #include <net/ip6_checksum.h>
39 #include <linux/net_tstamp.h>
40 #include <linux/mii.h>
41 #include <linux/ethtool.h>
42 #include <linux/if.h>
43 #include <linux/if_vlan.h>
44 #include <linux/pci.h>
45 #include <linux/pci-aspm.h>
46 #include <linux/delay.h>
47 #include <linux/interrupt.h>
48 #include <linux/ip.h>
49 #include <linux/tcp.h>
50 #include <linux/sctp.h>
51 #include <linux/if_ether.h>
52 #include <linux/aer.h>
53 #include <linux/prefetch.h>
54 #ifdef CONFIG_IGB_DCA
55 #include <linux/dca.h>
56 #endif
57 #include "igb.h"
58
59 #define MAJ 3
60 #define MIN 0
61 #define BUILD 6
62 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
63 __stringify(BUILD) "-k"
64 char igb_driver_name[] = "igb";
65 char igb_driver_version[] = DRV_VERSION;
66 static const char igb_driver_string[] =
67 "Intel(R) Gigabit Ethernet Network Driver";
68 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
69
70 static const struct e1000_info *igb_info_tbl[] = {
71 [board_82575] = &e1000_82575_info,
72 };
73
74 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
75 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
76 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
77 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
90 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
91 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
92 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
93 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
94 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
95 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
96 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
97 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
98 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
99 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
100 /* required last entry */
101 {0, }
102 };
103
104 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
105
106 void igb_reset(struct igb_adapter *);
107 static int igb_setup_all_tx_resources(struct igb_adapter *);
108 static int igb_setup_all_rx_resources(struct igb_adapter *);
109 static void igb_free_all_tx_resources(struct igb_adapter *);
110 static void igb_free_all_rx_resources(struct igb_adapter *);
111 static void igb_setup_mrqc(struct igb_adapter *);
112 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
113 static void __devexit igb_remove(struct pci_dev *pdev);
114 static void igb_init_hw_timer(struct igb_adapter *adapter);
115 static int igb_sw_init(struct igb_adapter *);
116 static int igb_open(struct net_device *);
117 static int igb_close(struct net_device *);
118 static void igb_configure_tx(struct igb_adapter *);
119 static void igb_configure_rx(struct igb_adapter *);
120 static void igb_clean_all_tx_rings(struct igb_adapter *);
121 static void igb_clean_all_rx_rings(struct igb_adapter *);
122 static void igb_clean_tx_ring(struct igb_ring *);
123 static void igb_clean_rx_ring(struct igb_ring *);
124 static void igb_set_rx_mode(struct net_device *);
125 static void igb_update_phy_info(unsigned long);
126 static void igb_watchdog(unsigned long);
127 static void igb_watchdog_task(struct work_struct *);
128 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
129 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
130 struct rtnl_link_stats64 *stats);
131 static int igb_change_mtu(struct net_device *, int);
132 static int igb_set_mac(struct net_device *, void *);
133 static void igb_set_uta(struct igb_adapter *adapter);
134 static irqreturn_t igb_intr(int irq, void *);
135 static irqreturn_t igb_intr_msi(int irq, void *);
136 static irqreturn_t igb_msix_other(int irq, void *);
137 static irqreturn_t igb_msix_ring(int irq, void *);
138 #ifdef CONFIG_IGB_DCA
139 static void igb_update_dca(struct igb_q_vector *);
140 static void igb_setup_dca(struct igb_adapter *);
141 #endif /* CONFIG_IGB_DCA */
142 static int igb_poll(struct napi_struct *, int);
143 static bool igb_clean_tx_irq(struct igb_q_vector *);
144 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
145 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
146 static void igb_tx_timeout(struct net_device *);
147 static void igb_reset_task(struct work_struct *);
148 static void igb_vlan_mode(struct net_device *netdev, u32 features);
149 static void igb_vlan_rx_add_vid(struct net_device *, u16);
150 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
151 static void igb_restore_vlan(struct igb_adapter *);
152 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
153 static void igb_ping_all_vfs(struct igb_adapter *);
154 static void igb_msg_task(struct igb_adapter *);
155 static void igb_vmm_control(struct igb_adapter *);
156 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
157 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
158 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
159 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
160 int vf, u16 vlan, u8 qos);
161 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
162 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
163 struct ifla_vf_info *ivi);
164 static void igb_check_vf_rate_limit(struct igb_adapter *);
165
166 #ifdef CONFIG_PM
167 static int igb_suspend(struct pci_dev *, pm_message_t);
168 static int igb_resume(struct pci_dev *);
169 #endif
170 static void igb_shutdown(struct pci_dev *);
171 #ifdef CONFIG_IGB_DCA
172 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
173 static struct notifier_block dca_notifier = {
174 .notifier_call = igb_notify_dca,
175 .next = NULL,
176 .priority = 0
177 };
178 #endif
179 #ifdef CONFIG_NET_POLL_CONTROLLER
180 /* for netdump / net console */
181 static void igb_netpoll(struct net_device *);
182 #endif
183 #ifdef CONFIG_PCI_IOV
184 static unsigned int max_vfs = 0;
185 module_param(max_vfs, uint, 0);
186 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
187 "per physical function");
188 #endif /* CONFIG_PCI_IOV */
189
190 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
191 pci_channel_state_t);
192 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
193 static void igb_io_resume(struct pci_dev *);
194
195 static struct pci_error_handlers igb_err_handler = {
196 .error_detected = igb_io_error_detected,
197 .slot_reset = igb_io_slot_reset,
198 .resume = igb_io_resume,
199 };
200
201
202 static struct pci_driver igb_driver = {
203 .name = igb_driver_name,
204 .id_table = igb_pci_tbl,
205 .probe = igb_probe,
206 .remove = __devexit_p(igb_remove),
207 #ifdef CONFIG_PM
208 /* Power Management Hooks */
209 .suspend = igb_suspend,
210 .resume = igb_resume,
211 #endif
212 .shutdown = igb_shutdown,
213 .err_handler = &igb_err_handler
214 };
215
216 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
217 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
218 MODULE_LICENSE("GPL");
219 MODULE_VERSION(DRV_VERSION);
220
221 struct igb_reg_info {
222 u32 ofs;
223 char *name;
224 };
225
226 static const struct igb_reg_info igb_reg_info_tbl[] = {
227
228 /* General Registers */
229 {E1000_CTRL, "CTRL"},
230 {E1000_STATUS, "STATUS"},
231 {E1000_CTRL_EXT, "CTRL_EXT"},
232
233 /* Interrupt Registers */
234 {E1000_ICR, "ICR"},
235
236 /* RX Registers */
237 {E1000_RCTL, "RCTL"},
238 {E1000_RDLEN(0), "RDLEN"},
239 {E1000_RDH(0), "RDH"},
240 {E1000_RDT(0), "RDT"},
241 {E1000_RXDCTL(0), "RXDCTL"},
242 {E1000_RDBAL(0), "RDBAL"},
243 {E1000_RDBAH(0), "RDBAH"},
244
245 /* TX Registers */
246 {E1000_TCTL, "TCTL"},
247 {E1000_TDBAL(0), "TDBAL"},
248 {E1000_TDBAH(0), "TDBAH"},
249 {E1000_TDLEN(0), "TDLEN"},
250 {E1000_TDH(0), "TDH"},
251 {E1000_TDT(0), "TDT"},
252 {E1000_TXDCTL(0), "TXDCTL"},
253 {E1000_TDFH, "TDFH"},
254 {E1000_TDFT, "TDFT"},
255 {E1000_TDFHS, "TDFHS"},
256 {E1000_TDFPC, "TDFPC"},
257
258 /* List Terminator */
259 {}
260 };
261
262 /*
263 * igb_regdump - register printout routine
264 */
265 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
266 {
267 int n = 0;
268 char rname[16];
269 u32 regs[8];
270
271 switch (reginfo->ofs) {
272 case E1000_RDLEN(0):
273 for (n = 0; n < 4; n++)
274 regs[n] = rd32(E1000_RDLEN(n));
275 break;
276 case E1000_RDH(0):
277 for (n = 0; n < 4; n++)
278 regs[n] = rd32(E1000_RDH(n));
279 break;
280 case E1000_RDT(0):
281 for (n = 0; n < 4; n++)
282 regs[n] = rd32(E1000_RDT(n));
283 break;
284 case E1000_RXDCTL(0):
285 for (n = 0; n < 4; n++)
286 regs[n] = rd32(E1000_RXDCTL(n));
287 break;
288 case E1000_RDBAL(0):
289 for (n = 0; n < 4; n++)
290 regs[n] = rd32(E1000_RDBAL(n));
291 break;
292 case E1000_RDBAH(0):
293 for (n = 0; n < 4; n++)
294 regs[n] = rd32(E1000_RDBAH(n));
295 break;
296 case E1000_TDBAL(0):
297 for (n = 0; n < 4; n++)
298 regs[n] = rd32(E1000_RDBAL(n));
299 break;
300 case E1000_TDBAH(0):
301 for (n = 0; n < 4; n++)
302 regs[n] = rd32(E1000_TDBAH(n));
303 break;
304 case E1000_TDLEN(0):
305 for (n = 0; n < 4; n++)
306 regs[n] = rd32(E1000_TDLEN(n));
307 break;
308 case E1000_TDH(0):
309 for (n = 0; n < 4; n++)
310 regs[n] = rd32(E1000_TDH(n));
311 break;
312 case E1000_TDT(0):
313 for (n = 0; n < 4; n++)
314 regs[n] = rd32(E1000_TDT(n));
315 break;
316 case E1000_TXDCTL(0):
317 for (n = 0; n < 4; n++)
318 regs[n] = rd32(E1000_TXDCTL(n));
319 break;
320 default:
321 printk(KERN_INFO "%-15s %08x\n",
322 reginfo->name, rd32(reginfo->ofs));
323 return;
324 }
325
326 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
327 printk(KERN_INFO "%-15s ", rname);
328 for (n = 0; n < 4; n++)
329 printk(KERN_CONT "%08x ", regs[n]);
330 printk(KERN_CONT "\n");
331 }
332
333 /*
334 * igb_dump - Print registers, tx-rings and rx-rings
335 */
336 static void igb_dump(struct igb_adapter *adapter)
337 {
338 struct net_device *netdev = adapter->netdev;
339 struct e1000_hw *hw = &adapter->hw;
340 struct igb_reg_info *reginfo;
341 int n = 0;
342 struct igb_ring *tx_ring;
343 union e1000_adv_tx_desc *tx_desc;
344 struct my_u0 { u64 a; u64 b; } *u0;
345 struct igb_ring *rx_ring;
346 union e1000_adv_rx_desc *rx_desc;
347 u32 staterr;
348 int i = 0;
349
350 if (!netif_msg_hw(adapter))
351 return;
352
353 /* Print netdevice Info */
354 if (netdev) {
355 dev_info(&adapter->pdev->dev, "Net device Info\n");
356 printk(KERN_INFO "Device Name state "
357 "trans_start last_rx\n");
358 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
359 netdev->name,
360 netdev->state,
361 netdev->trans_start,
362 netdev->last_rx);
363 }
364
365 /* Print Registers */
366 dev_info(&adapter->pdev->dev, "Register Dump\n");
367 printk(KERN_INFO " Register Name Value\n");
368 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
369 reginfo->name; reginfo++) {
370 igb_regdump(hw, reginfo);
371 }
372
373 /* Print TX Ring Summary */
374 if (!netdev || !netif_running(netdev))
375 goto exit;
376
377 dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
378 printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma ]"
379 " leng ntw timestamp\n");
380 for (n = 0; n < adapter->num_tx_queues; n++) {
381 struct igb_tx_buffer *buffer_info;
382 tx_ring = adapter->tx_ring[n];
383 buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
384 printk(KERN_INFO " %5d %5X %5X %016llX %04X %p %016llX\n",
385 n, tx_ring->next_to_use, tx_ring->next_to_clean,
386 (u64)buffer_info->dma,
387 buffer_info->length,
388 buffer_info->next_to_watch,
389 (u64)buffer_info->time_stamp);
390 }
391
392 /* Print TX Rings */
393 if (!netif_msg_tx_done(adapter))
394 goto rx_ring_summary;
395
396 dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
397
398 /* Transmit Descriptor Formats
399 *
400 * Advanced Transmit Descriptor
401 * +--------------------------------------------------------------+
402 * 0 | Buffer Address [63:0] |
403 * +--------------------------------------------------------------+
404 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
405 * +--------------------------------------------------------------+
406 * 63 46 45 40 39 38 36 35 32 31 24 15 0
407 */
408
409 for (n = 0; n < adapter->num_tx_queues; n++) {
410 tx_ring = adapter->tx_ring[n];
411 printk(KERN_INFO "------------------------------------\n");
412 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
413 printk(KERN_INFO "------------------------------------\n");
414 printk(KERN_INFO "T [desc] [address 63:0 ] "
415 "[PlPOCIStDDM Ln] [bi->dma ] "
416 "leng ntw timestamp bi->skb\n");
417
418 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
419 struct igb_tx_buffer *buffer_info;
420 tx_desc = IGB_TX_DESC(tx_ring, i);
421 buffer_info = &tx_ring->tx_buffer_info[i];
422 u0 = (struct my_u0 *)tx_desc;
423 printk(KERN_INFO "T [0x%03X] %016llX %016llX %016llX"
424 " %04X %p %016llX %p", i,
425 le64_to_cpu(u0->a),
426 le64_to_cpu(u0->b),
427 (u64)buffer_info->dma,
428 buffer_info->length,
429 buffer_info->next_to_watch,
430 (u64)buffer_info->time_stamp,
431 buffer_info->skb);
432 if (i == tx_ring->next_to_use &&
433 i == tx_ring->next_to_clean)
434 printk(KERN_CONT " NTC/U\n");
435 else if (i == tx_ring->next_to_use)
436 printk(KERN_CONT " NTU\n");
437 else if (i == tx_ring->next_to_clean)
438 printk(KERN_CONT " NTC\n");
439 else
440 printk(KERN_CONT "\n");
441
442 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
443 print_hex_dump(KERN_INFO, "",
444 DUMP_PREFIX_ADDRESS,
445 16, 1, phys_to_virt(buffer_info->dma),
446 buffer_info->length, true);
447 }
448 }
449
450 /* Print RX Rings Summary */
451 rx_ring_summary:
452 dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
453 printk(KERN_INFO "Queue [NTU] [NTC]\n");
454 for (n = 0; n < adapter->num_rx_queues; n++) {
455 rx_ring = adapter->rx_ring[n];
456 printk(KERN_INFO " %5d %5X %5X\n", n,
457 rx_ring->next_to_use, rx_ring->next_to_clean);
458 }
459
460 /* Print RX Rings */
461 if (!netif_msg_rx_status(adapter))
462 goto exit;
463
464 dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
465
466 /* Advanced Receive Descriptor (Read) Format
467 * 63 1 0
468 * +-----------------------------------------------------+
469 * 0 | Packet Buffer Address [63:1] |A0/NSE|
470 * +----------------------------------------------+------+
471 * 8 | Header Buffer Address [63:1] | DD |
472 * +-----------------------------------------------------+
473 *
474 *
475 * Advanced Receive Descriptor (Write-Back) Format
476 *
477 * 63 48 47 32 31 30 21 20 17 16 4 3 0
478 * +------------------------------------------------------+
479 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
480 * | Checksum Ident | | | | Type | Type |
481 * +------------------------------------------------------+
482 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
483 * +------------------------------------------------------+
484 * 63 48 47 32 31 20 19 0
485 */
486
487 for (n = 0; n < adapter->num_rx_queues; n++) {
488 rx_ring = adapter->rx_ring[n];
489 printk(KERN_INFO "------------------------------------\n");
490 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
491 printk(KERN_INFO "------------------------------------\n");
492 printk(KERN_INFO "R [desc] [ PktBuf A0] "
493 "[ HeadBuf DD] [bi->dma ] [bi->skb] "
494 "<-- Adv Rx Read format\n");
495 printk(KERN_INFO "RWB[desc] [PcsmIpSHl PtRs] "
496 "[vl er S cks ln] ---------------- [bi->skb] "
497 "<-- Adv Rx Write-Back format\n");
498
499 for (i = 0; i < rx_ring->count; i++) {
500 struct igb_rx_buffer *buffer_info;
501 buffer_info = &rx_ring->rx_buffer_info[i];
502 rx_desc = IGB_RX_DESC(rx_ring, i);
503 u0 = (struct my_u0 *)rx_desc;
504 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
505 if (staterr & E1000_RXD_STAT_DD) {
506 /* Descriptor Done */
507 printk(KERN_INFO "RWB[0x%03X] %016llX "
508 "%016llX ---------------- %p", i,
509 le64_to_cpu(u0->a),
510 le64_to_cpu(u0->b),
511 buffer_info->skb);
512 } else {
513 printk(KERN_INFO "R [0x%03X] %016llX "
514 "%016llX %016llX %p", i,
515 le64_to_cpu(u0->a),
516 le64_to_cpu(u0->b),
517 (u64)buffer_info->dma,
518 buffer_info->skb);
519
520 if (netif_msg_pktdata(adapter)) {
521 print_hex_dump(KERN_INFO, "",
522 DUMP_PREFIX_ADDRESS,
523 16, 1,
524 phys_to_virt(buffer_info->dma),
525 IGB_RX_HDR_LEN, true);
526 print_hex_dump(KERN_INFO, "",
527 DUMP_PREFIX_ADDRESS,
528 16, 1,
529 phys_to_virt(
530 buffer_info->page_dma +
531 buffer_info->page_offset),
532 PAGE_SIZE/2, true);
533 }
534 }
535
536 if (i == rx_ring->next_to_use)
537 printk(KERN_CONT " NTU\n");
538 else if (i == rx_ring->next_to_clean)
539 printk(KERN_CONT " NTC\n");
540 else
541 printk(KERN_CONT "\n");
542
543 }
544 }
545
546 exit:
547 return;
548 }
549
550
551 /**
552 * igb_read_clock - read raw cycle counter (to be used by time counter)
553 */
554 static cycle_t igb_read_clock(const struct cyclecounter *tc)
555 {
556 struct igb_adapter *adapter =
557 container_of(tc, struct igb_adapter, cycles);
558 struct e1000_hw *hw = &adapter->hw;
559 u64 stamp = 0;
560 int shift = 0;
561
562 /*
563 * The timestamp latches on lowest register read. For the 82580
564 * the lowest register is SYSTIMR instead of SYSTIML. However we never
565 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
566 */
567 if (hw->mac.type == e1000_82580) {
568 stamp = rd32(E1000_SYSTIMR) >> 8;
569 shift = IGB_82580_TSYNC_SHIFT;
570 }
571
572 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
573 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
574 return stamp;
575 }
576
577 /**
578 * igb_get_hw_dev - return device
579 * used by hardware layer to print debugging information
580 **/
581 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
582 {
583 struct igb_adapter *adapter = hw->back;
584 return adapter->netdev;
585 }
586
587 /**
588 * igb_init_module - Driver Registration Routine
589 *
590 * igb_init_module is the first routine called when the driver is
591 * loaded. All it does is register with the PCI subsystem.
592 **/
593 static int __init igb_init_module(void)
594 {
595 int ret;
596 printk(KERN_INFO "%s - version %s\n",
597 igb_driver_string, igb_driver_version);
598
599 printk(KERN_INFO "%s\n", igb_copyright);
600
601 #ifdef CONFIG_IGB_DCA
602 dca_register_notify(&dca_notifier);
603 #endif
604 ret = pci_register_driver(&igb_driver);
605 return ret;
606 }
607
608 module_init(igb_init_module);
609
610 /**
611 * igb_exit_module - Driver Exit Cleanup Routine
612 *
613 * igb_exit_module is called just before the driver is removed
614 * from memory.
615 **/
616 static void __exit igb_exit_module(void)
617 {
618 #ifdef CONFIG_IGB_DCA
619 dca_unregister_notify(&dca_notifier);
620 #endif
621 pci_unregister_driver(&igb_driver);
622 }
623
624 module_exit(igb_exit_module);
625
626 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
627 /**
628 * igb_cache_ring_register - Descriptor ring to register mapping
629 * @adapter: board private structure to initialize
630 *
631 * Once we know the feature-set enabled for the device, we'll cache
632 * the register offset the descriptor ring is assigned to.
633 **/
634 static void igb_cache_ring_register(struct igb_adapter *adapter)
635 {
636 int i = 0, j = 0;
637 u32 rbase_offset = adapter->vfs_allocated_count;
638
639 switch (adapter->hw.mac.type) {
640 case e1000_82576:
641 /* The queues are allocated for virtualization such that VF 0
642 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
643 * In order to avoid collision we start at the first free queue
644 * and continue consuming queues in the same sequence
645 */
646 if (adapter->vfs_allocated_count) {
647 for (; i < adapter->rss_queues; i++)
648 adapter->rx_ring[i]->reg_idx = rbase_offset +
649 Q_IDX_82576(i);
650 }
651 case e1000_82575:
652 case e1000_82580:
653 case e1000_i350:
654 default:
655 for (; i < adapter->num_rx_queues; i++)
656 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
657 for (; j < adapter->num_tx_queues; j++)
658 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
659 break;
660 }
661 }
662
663 static void igb_free_queues(struct igb_adapter *adapter)
664 {
665 int i;
666
667 for (i = 0; i < adapter->num_tx_queues; i++) {
668 kfree(adapter->tx_ring[i]);
669 adapter->tx_ring[i] = NULL;
670 }
671 for (i = 0; i < adapter->num_rx_queues; i++) {
672 kfree(adapter->rx_ring[i]);
673 adapter->rx_ring[i] = NULL;
674 }
675 adapter->num_rx_queues = 0;
676 adapter->num_tx_queues = 0;
677 }
678
679 /**
680 * igb_alloc_queues - Allocate memory for all rings
681 * @adapter: board private structure to initialize
682 *
683 * We allocate one ring per queue at run-time since we don't know the
684 * number of queues at compile-time.
685 **/
686 static int igb_alloc_queues(struct igb_adapter *adapter)
687 {
688 struct igb_ring *ring;
689 int i;
690
691 for (i = 0; i < adapter->num_tx_queues; i++) {
692 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
693 if (!ring)
694 goto err;
695 ring->count = adapter->tx_ring_count;
696 ring->queue_index = i;
697 ring->dev = &adapter->pdev->dev;
698 ring->netdev = adapter->netdev;
699 /* For 82575, context index must be unique per ring. */
700 if (adapter->hw.mac.type == e1000_82575)
701 ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
702 adapter->tx_ring[i] = ring;
703 }
704
705 for (i = 0; i < adapter->num_rx_queues; i++) {
706 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
707 if (!ring)
708 goto err;
709 ring->count = adapter->rx_ring_count;
710 ring->queue_index = i;
711 ring->dev = &adapter->pdev->dev;
712 ring->netdev = adapter->netdev;
713 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
714 /* set flag indicating ring supports SCTP checksum offload */
715 if (adapter->hw.mac.type >= e1000_82576)
716 ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
717 adapter->rx_ring[i] = ring;
718 }
719
720 igb_cache_ring_register(adapter);
721
722 return 0;
723
724 err:
725 igb_free_queues(adapter);
726
727 return -ENOMEM;
728 }
729
730 #define IGB_N0_QUEUE -1
731 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
732 {
733 u32 msixbm = 0;
734 struct igb_adapter *adapter = q_vector->adapter;
735 struct e1000_hw *hw = &adapter->hw;
736 u32 ivar, index;
737 int rx_queue = IGB_N0_QUEUE;
738 int tx_queue = IGB_N0_QUEUE;
739
740 if (q_vector->rx_ring)
741 rx_queue = q_vector->rx_ring->reg_idx;
742 if (q_vector->tx_ring)
743 tx_queue = q_vector->tx_ring->reg_idx;
744
745 switch (hw->mac.type) {
746 case e1000_82575:
747 /* The 82575 assigns vectors using a bitmask, which matches the
748 bitmask for the EICR/EIMS/EIMC registers. To assign one
749 or more queues to a vector, we write the appropriate bits
750 into the MSIXBM register for that vector. */
751 if (rx_queue > IGB_N0_QUEUE)
752 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
753 if (tx_queue > IGB_N0_QUEUE)
754 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
755 if (!adapter->msix_entries && msix_vector == 0)
756 msixbm |= E1000_EIMS_OTHER;
757 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
758 q_vector->eims_value = msixbm;
759 break;
760 case e1000_82576:
761 /* 82576 uses a table-based method for assigning vectors.
762 Each queue has a single entry in the table to which we write
763 a vector number along with a "valid" bit. Sadly, the layout
764 of the table is somewhat counterintuitive. */
765 if (rx_queue > IGB_N0_QUEUE) {
766 index = (rx_queue & 0x7);
767 ivar = array_rd32(E1000_IVAR0, index);
768 if (rx_queue < 8) {
769 /* vector goes into low byte of register */
770 ivar = ivar & 0xFFFFFF00;
771 ivar |= msix_vector | E1000_IVAR_VALID;
772 } else {
773 /* vector goes into third byte of register */
774 ivar = ivar & 0xFF00FFFF;
775 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
776 }
777 array_wr32(E1000_IVAR0, index, ivar);
778 }
779 if (tx_queue > IGB_N0_QUEUE) {
780 index = (tx_queue & 0x7);
781 ivar = array_rd32(E1000_IVAR0, index);
782 if (tx_queue < 8) {
783 /* vector goes into second byte of register */
784 ivar = ivar & 0xFFFF00FF;
785 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
786 } else {
787 /* vector goes into high byte of register */
788 ivar = ivar & 0x00FFFFFF;
789 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
790 }
791 array_wr32(E1000_IVAR0, index, ivar);
792 }
793 q_vector->eims_value = 1 << msix_vector;
794 break;
795 case e1000_82580:
796 case e1000_i350:
797 /* 82580 uses the same table-based approach as 82576 but has fewer
798 entries as a result we carry over for queues greater than 4. */
799 if (rx_queue > IGB_N0_QUEUE) {
800 index = (rx_queue >> 1);
801 ivar = array_rd32(E1000_IVAR0, index);
802 if (rx_queue & 0x1) {
803 /* vector goes into third byte of register */
804 ivar = ivar & 0xFF00FFFF;
805 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
806 } else {
807 /* vector goes into low byte of register */
808 ivar = ivar & 0xFFFFFF00;
809 ivar |= msix_vector | E1000_IVAR_VALID;
810 }
811 array_wr32(E1000_IVAR0, index, ivar);
812 }
813 if (tx_queue > IGB_N0_QUEUE) {
814 index = (tx_queue >> 1);
815 ivar = array_rd32(E1000_IVAR0, index);
816 if (tx_queue & 0x1) {
817 /* vector goes into high byte of register */
818 ivar = ivar & 0x00FFFFFF;
819 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
820 } else {
821 /* vector goes into second byte of register */
822 ivar = ivar & 0xFFFF00FF;
823 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
824 }
825 array_wr32(E1000_IVAR0, index, ivar);
826 }
827 q_vector->eims_value = 1 << msix_vector;
828 break;
829 default:
830 BUG();
831 break;
832 }
833
834 /* add q_vector eims value to global eims_enable_mask */
835 adapter->eims_enable_mask |= q_vector->eims_value;
836
837 /* configure q_vector to set itr on first interrupt */
838 q_vector->set_itr = 1;
839 }
840
841 /**
842 * igb_configure_msix - Configure MSI-X hardware
843 *
844 * igb_configure_msix sets up the hardware to properly
845 * generate MSI-X interrupts.
846 **/
847 static void igb_configure_msix(struct igb_adapter *adapter)
848 {
849 u32 tmp;
850 int i, vector = 0;
851 struct e1000_hw *hw = &adapter->hw;
852
853 adapter->eims_enable_mask = 0;
854
855 /* set vector for other causes, i.e. link changes */
856 switch (hw->mac.type) {
857 case e1000_82575:
858 tmp = rd32(E1000_CTRL_EXT);
859 /* enable MSI-X PBA support*/
860 tmp |= E1000_CTRL_EXT_PBA_CLR;
861
862 /* Auto-Mask interrupts upon ICR read. */
863 tmp |= E1000_CTRL_EXT_EIAME;
864 tmp |= E1000_CTRL_EXT_IRCA;
865
866 wr32(E1000_CTRL_EXT, tmp);
867
868 /* enable msix_other interrupt */
869 array_wr32(E1000_MSIXBM(0), vector++,
870 E1000_EIMS_OTHER);
871 adapter->eims_other = E1000_EIMS_OTHER;
872
873 break;
874
875 case e1000_82576:
876 case e1000_82580:
877 case e1000_i350:
878 /* Turn on MSI-X capability first, or our settings
879 * won't stick. And it will take days to debug. */
880 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
881 E1000_GPIE_PBA | E1000_GPIE_EIAME |
882 E1000_GPIE_NSICR);
883
884 /* enable msix_other interrupt */
885 adapter->eims_other = 1 << vector;
886 tmp = (vector++ | E1000_IVAR_VALID) << 8;
887
888 wr32(E1000_IVAR_MISC, tmp);
889 break;
890 default:
891 /* do nothing, since nothing else supports MSI-X */
892 break;
893 } /* switch (hw->mac.type) */
894
895 adapter->eims_enable_mask |= adapter->eims_other;
896
897 for (i = 0; i < adapter->num_q_vectors; i++)
898 igb_assign_vector(adapter->q_vector[i], vector++);
899
900 wrfl();
901 }
902
903 /**
904 * igb_request_msix - Initialize MSI-X interrupts
905 *
906 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
907 * kernel.
908 **/
909 static int igb_request_msix(struct igb_adapter *adapter)
910 {
911 struct net_device *netdev = adapter->netdev;
912 struct e1000_hw *hw = &adapter->hw;
913 int i, err = 0, vector = 0;
914
915 err = request_irq(adapter->msix_entries[vector].vector,
916 igb_msix_other, 0, netdev->name, adapter);
917 if (err)
918 goto out;
919 vector++;
920
921 for (i = 0; i < adapter->num_q_vectors; i++) {
922 struct igb_q_vector *q_vector = adapter->q_vector[i];
923
924 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
925
926 if (q_vector->rx_ring && q_vector->tx_ring)
927 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
928 q_vector->rx_ring->queue_index);
929 else if (q_vector->tx_ring)
930 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
931 q_vector->tx_ring->queue_index);
932 else if (q_vector->rx_ring)
933 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
934 q_vector->rx_ring->queue_index);
935 else
936 sprintf(q_vector->name, "%s-unused", netdev->name);
937
938 err = request_irq(adapter->msix_entries[vector].vector,
939 igb_msix_ring, 0, q_vector->name,
940 q_vector);
941 if (err)
942 goto out;
943 vector++;
944 }
945
946 igb_configure_msix(adapter);
947 return 0;
948 out:
949 return err;
950 }
951
952 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
953 {
954 if (adapter->msix_entries) {
955 pci_disable_msix(adapter->pdev);
956 kfree(adapter->msix_entries);
957 adapter->msix_entries = NULL;
958 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
959 pci_disable_msi(adapter->pdev);
960 }
961 }
962
963 /**
964 * igb_free_q_vectors - Free memory allocated for interrupt vectors
965 * @adapter: board private structure to initialize
966 *
967 * This function frees the memory allocated to the q_vectors. In addition if
968 * NAPI is enabled it will delete any references to the NAPI struct prior
969 * to freeing the q_vector.
970 **/
971 static void igb_free_q_vectors(struct igb_adapter *adapter)
972 {
973 int v_idx;
974
975 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
976 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
977 adapter->q_vector[v_idx] = NULL;
978 if (!q_vector)
979 continue;
980 netif_napi_del(&q_vector->napi);
981 kfree(q_vector);
982 }
983 adapter->num_q_vectors = 0;
984 }
985
986 /**
987 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
988 *
989 * This function resets the device so that it has 0 rx queues, tx queues, and
990 * MSI-X interrupts allocated.
991 */
992 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
993 {
994 igb_free_queues(adapter);
995 igb_free_q_vectors(adapter);
996 igb_reset_interrupt_capability(adapter);
997 }
998
999 /**
1000 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1001 *
1002 * Attempt to configure interrupts using the best available
1003 * capabilities of the hardware and kernel.
1004 **/
1005 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1006 {
1007 int err;
1008 int numvecs, i;
1009
1010 /* Number of supported queues. */
1011 adapter->num_rx_queues = adapter->rss_queues;
1012 if (adapter->vfs_allocated_count)
1013 adapter->num_tx_queues = 1;
1014 else
1015 adapter->num_tx_queues = adapter->rss_queues;
1016
1017 /* start with one vector for every rx queue */
1018 numvecs = adapter->num_rx_queues;
1019
1020 /* if tx handler is separate add 1 for every tx queue */
1021 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1022 numvecs += adapter->num_tx_queues;
1023
1024 /* store the number of vectors reserved for queues */
1025 adapter->num_q_vectors = numvecs;
1026
1027 /* add 1 vector for link status interrupts */
1028 numvecs++;
1029 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1030 GFP_KERNEL);
1031 if (!adapter->msix_entries)
1032 goto msi_only;
1033
1034 for (i = 0; i < numvecs; i++)
1035 adapter->msix_entries[i].entry = i;
1036
1037 err = pci_enable_msix(adapter->pdev,
1038 adapter->msix_entries,
1039 numvecs);
1040 if (err == 0)
1041 goto out;
1042
1043 igb_reset_interrupt_capability(adapter);
1044
1045 /* If we can't do MSI-X, try MSI */
1046 msi_only:
1047 #ifdef CONFIG_PCI_IOV
1048 /* disable SR-IOV for non MSI-X configurations */
1049 if (adapter->vf_data) {
1050 struct e1000_hw *hw = &adapter->hw;
1051 /* disable iov and allow time for transactions to clear */
1052 pci_disable_sriov(adapter->pdev);
1053 msleep(500);
1054
1055 kfree(adapter->vf_data);
1056 adapter->vf_data = NULL;
1057 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1058 wrfl();
1059 msleep(100);
1060 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1061 }
1062 #endif
1063 adapter->vfs_allocated_count = 0;
1064 adapter->rss_queues = 1;
1065 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1066 adapter->num_rx_queues = 1;
1067 adapter->num_tx_queues = 1;
1068 adapter->num_q_vectors = 1;
1069 if (!pci_enable_msi(adapter->pdev))
1070 adapter->flags |= IGB_FLAG_HAS_MSI;
1071 out:
1072 /* Notify the stack of the (possibly) reduced queue counts. */
1073 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1074 return netif_set_real_num_rx_queues(adapter->netdev,
1075 adapter->num_rx_queues);
1076 }
1077
1078 /**
1079 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1080 * @adapter: board private structure to initialize
1081 *
1082 * We allocate one q_vector per queue interrupt. If allocation fails we
1083 * return -ENOMEM.
1084 **/
1085 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1086 {
1087 struct igb_q_vector *q_vector;
1088 struct e1000_hw *hw = &adapter->hw;
1089 int v_idx;
1090
1091 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1092 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1093 if (!q_vector)
1094 goto err_out;
1095 q_vector->adapter = adapter;
1096 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1097 q_vector->itr_val = IGB_START_ITR;
1098 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1099 adapter->q_vector[v_idx] = q_vector;
1100 }
1101 return 0;
1102
1103 err_out:
1104 igb_free_q_vectors(adapter);
1105 return -ENOMEM;
1106 }
1107
1108 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1109 int ring_idx, int v_idx)
1110 {
1111 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1112
1113 q_vector->rx_ring = adapter->rx_ring[ring_idx];
1114 q_vector->rx_ring->q_vector = q_vector;
1115 q_vector->itr_val = adapter->rx_itr_setting;
1116 if (q_vector->itr_val && q_vector->itr_val <= 3)
1117 q_vector->itr_val = IGB_START_ITR;
1118 }
1119
1120 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1121 int ring_idx, int v_idx)
1122 {
1123 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1124
1125 q_vector->tx_ring = adapter->tx_ring[ring_idx];
1126 q_vector->tx_ring->q_vector = q_vector;
1127 q_vector->itr_val = adapter->tx_itr_setting;
1128 q_vector->tx_work_limit = adapter->tx_work_limit;
1129 if (q_vector->itr_val && q_vector->itr_val <= 3)
1130 q_vector->itr_val = IGB_START_ITR;
1131 }
1132
1133 /**
1134 * igb_map_ring_to_vector - maps allocated queues to vectors
1135 *
1136 * This function maps the recently allocated queues to vectors.
1137 **/
1138 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1139 {
1140 int i;
1141 int v_idx = 0;
1142
1143 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1144 (adapter->num_q_vectors < adapter->num_tx_queues))
1145 return -ENOMEM;
1146
1147 if (adapter->num_q_vectors >=
1148 (adapter->num_rx_queues + adapter->num_tx_queues)) {
1149 for (i = 0; i < adapter->num_rx_queues; i++)
1150 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1151 for (i = 0; i < adapter->num_tx_queues; i++)
1152 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1153 } else {
1154 for (i = 0; i < adapter->num_rx_queues; i++) {
1155 if (i < adapter->num_tx_queues)
1156 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1157 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1158 }
1159 for (; i < adapter->num_tx_queues; i++)
1160 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1161 }
1162 return 0;
1163 }
1164
1165 /**
1166 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1167 *
1168 * This function initializes the interrupts and allocates all of the queues.
1169 **/
1170 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1171 {
1172 struct pci_dev *pdev = adapter->pdev;
1173 int err;
1174
1175 err = igb_set_interrupt_capability(adapter);
1176 if (err)
1177 return err;
1178
1179 err = igb_alloc_q_vectors(adapter);
1180 if (err) {
1181 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1182 goto err_alloc_q_vectors;
1183 }
1184
1185 err = igb_alloc_queues(adapter);
1186 if (err) {
1187 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1188 goto err_alloc_queues;
1189 }
1190
1191 err = igb_map_ring_to_vector(adapter);
1192 if (err) {
1193 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1194 goto err_map_queues;
1195 }
1196
1197
1198 return 0;
1199 err_map_queues:
1200 igb_free_queues(adapter);
1201 err_alloc_queues:
1202 igb_free_q_vectors(adapter);
1203 err_alloc_q_vectors:
1204 igb_reset_interrupt_capability(adapter);
1205 return err;
1206 }
1207
1208 /**
1209 * igb_request_irq - initialize interrupts
1210 *
1211 * Attempts to configure interrupts using the best available
1212 * capabilities of the hardware and kernel.
1213 **/
1214 static int igb_request_irq(struct igb_adapter *adapter)
1215 {
1216 struct net_device *netdev = adapter->netdev;
1217 struct pci_dev *pdev = adapter->pdev;
1218 int err = 0;
1219
1220 if (adapter->msix_entries) {
1221 err = igb_request_msix(adapter);
1222 if (!err)
1223 goto request_done;
1224 /* fall back to MSI */
1225 igb_clear_interrupt_scheme(adapter);
1226 if (!pci_enable_msi(adapter->pdev))
1227 adapter->flags |= IGB_FLAG_HAS_MSI;
1228 igb_free_all_tx_resources(adapter);
1229 igb_free_all_rx_resources(adapter);
1230 adapter->num_tx_queues = 1;
1231 adapter->num_rx_queues = 1;
1232 adapter->num_q_vectors = 1;
1233 err = igb_alloc_q_vectors(adapter);
1234 if (err) {
1235 dev_err(&pdev->dev,
1236 "Unable to allocate memory for vectors\n");
1237 goto request_done;
1238 }
1239 err = igb_alloc_queues(adapter);
1240 if (err) {
1241 dev_err(&pdev->dev,
1242 "Unable to allocate memory for queues\n");
1243 igb_free_q_vectors(adapter);
1244 goto request_done;
1245 }
1246 igb_setup_all_tx_resources(adapter);
1247 igb_setup_all_rx_resources(adapter);
1248 } else {
1249 igb_assign_vector(adapter->q_vector[0], 0);
1250 }
1251
1252 if (adapter->flags & IGB_FLAG_HAS_MSI) {
1253 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1254 netdev->name, adapter);
1255 if (!err)
1256 goto request_done;
1257
1258 /* fall back to legacy interrupts */
1259 igb_reset_interrupt_capability(adapter);
1260 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1261 }
1262
1263 err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1264 netdev->name, adapter);
1265
1266 if (err)
1267 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1268 err);
1269
1270 request_done:
1271 return err;
1272 }
1273
1274 static void igb_free_irq(struct igb_adapter *adapter)
1275 {
1276 if (adapter->msix_entries) {
1277 int vector = 0, i;
1278
1279 free_irq(adapter->msix_entries[vector++].vector, adapter);
1280
1281 for (i = 0; i < adapter->num_q_vectors; i++) {
1282 struct igb_q_vector *q_vector = adapter->q_vector[i];
1283 free_irq(adapter->msix_entries[vector++].vector,
1284 q_vector);
1285 }
1286 } else {
1287 free_irq(adapter->pdev->irq, adapter);
1288 }
1289 }
1290
1291 /**
1292 * igb_irq_disable - Mask off interrupt generation on the NIC
1293 * @adapter: board private structure
1294 **/
1295 static void igb_irq_disable(struct igb_adapter *adapter)
1296 {
1297 struct e1000_hw *hw = &adapter->hw;
1298
1299 /*
1300 * we need to be careful when disabling interrupts. The VFs are also
1301 * mapped into these registers and so clearing the bits can cause
1302 * issues on the VF drivers so we only need to clear what we set
1303 */
1304 if (adapter->msix_entries) {
1305 u32 regval = rd32(E1000_EIAM);
1306 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1307 wr32(E1000_EIMC, adapter->eims_enable_mask);
1308 regval = rd32(E1000_EIAC);
1309 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1310 }
1311
1312 wr32(E1000_IAM, 0);
1313 wr32(E1000_IMC, ~0);
1314 wrfl();
1315 if (adapter->msix_entries) {
1316 int i;
1317 for (i = 0; i < adapter->num_q_vectors; i++)
1318 synchronize_irq(adapter->msix_entries[i].vector);
1319 } else {
1320 synchronize_irq(adapter->pdev->irq);
1321 }
1322 }
1323
1324 /**
1325 * igb_irq_enable - Enable default interrupt generation settings
1326 * @adapter: board private structure
1327 **/
1328 static void igb_irq_enable(struct igb_adapter *adapter)
1329 {
1330 struct e1000_hw *hw = &adapter->hw;
1331
1332 if (adapter->msix_entries) {
1333 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1334 u32 regval = rd32(E1000_EIAC);
1335 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1336 regval = rd32(E1000_EIAM);
1337 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1338 wr32(E1000_EIMS, adapter->eims_enable_mask);
1339 if (adapter->vfs_allocated_count) {
1340 wr32(E1000_MBVFIMR, 0xFF);
1341 ims |= E1000_IMS_VMMB;
1342 }
1343 if (adapter->hw.mac.type == e1000_82580)
1344 ims |= E1000_IMS_DRSTA;
1345
1346 wr32(E1000_IMS, ims);
1347 } else {
1348 wr32(E1000_IMS, IMS_ENABLE_MASK |
1349 E1000_IMS_DRSTA);
1350 wr32(E1000_IAM, IMS_ENABLE_MASK |
1351 E1000_IMS_DRSTA);
1352 }
1353 }
1354
1355 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1356 {
1357 struct e1000_hw *hw = &adapter->hw;
1358 u16 vid = adapter->hw.mng_cookie.vlan_id;
1359 u16 old_vid = adapter->mng_vlan_id;
1360
1361 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1362 /* add VID to filter table */
1363 igb_vfta_set(hw, vid, true);
1364 adapter->mng_vlan_id = vid;
1365 } else {
1366 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1367 }
1368
1369 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1370 (vid != old_vid) &&
1371 !test_bit(old_vid, adapter->active_vlans)) {
1372 /* remove VID from filter table */
1373 igb_vfta_set(hw, old_vid, false);
1374 }
1375 }
1376
1377 /**
1378 * igb_release_hw_control - release control of the h/w to f/w
1379 * @adapter: address of board private structure
1380 *
1381 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1382 * For ASF and Pass Through versions of f/w this means that the
1383 * driver is no longer loaded.
1384 *
1385 **/
1386 static void igb_release_hw_control(struct igb_adapter *adapter)
1387 {
1388 struct e1000_hw *hw = &adapter->hw;
1389 u32 ctrl_ext;
1390
1391 /* Let firmware take over control of h/w */
1392 ctrl_ext = rd32(E1000_CTRL_EXT);
1393 wr32(E1000_CTRL_EXT,
1394 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1395 }
1396
1397 /**
1398 * igb_get_hw_control - get control of the h/w from f/w
1399 * @adapter: address of board private structure
1400 *
1401 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1402 * For ASF and Pass Through versions of f/w this means that
1403 * the driver is loaded.
1404 *
1405 **/
1406 static void igb_get_hw_control(struct igb_adapter *adapter)
1407 {
1408 struct e1000_hw *hw = &adapter->hw;
1409 u32 ctrl_ext;
1410
1411 /* Let firmware know the driver has taken over */
1412 ctrl_ext = rd32(E1000_CTRL_EXT);
1413 wr32(E1000_CTRL_EXT,
1414 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1415 }
1416
1417 /**
1418 * igb_configure - configure the hardware for RX and TX
1419 * @adapter: private board structure
1420 **/
1421 static void igb_configure(struct igb_adapter *adapter)
1422 {
1423 struct net_device *netdev = adapter->netdev;
1424 int i;
1425
1426 igb_get_hw_control(adapter);
1427 igb_set_rx_mode(netdev);
1428
1429 igb_restore_vlan(adapter);
1430
1431 igb_setup_tctl(adapter);
1432 igb_setup_mrqc(adapter);
1433 igb_setup_rctl(adapter);
1434
1435 igb_configure_tx(adapter);
1436 igb_configure_rx(adapter);
1437
1438 igb_rx_fifo_flush_82575(&adapter->hw);
1439
1440 /* call igb_desc_unused which always leaves
1441 * at least 1 descriptor unused to make sure
1442 * next_to_use != next_to_clean */
1443 for (i = 0; i < adapter->num_rx_queues; i++) {
1444 struct igb_ring *ring = adapter->rx_ring[i];
1445 igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1446 }
1447 }
1448
1449 /**
1450 * igb_power_up_link - Power up the phy/serdes link
1451 * @adapter: address of board private structure
1452 **/
1453 void igb_power_up_link(struct igb_adapter *adapter)
1454 {
1455 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1456 igb_power_up_phy_copper(&adapter->hw);
1457 else
1458 igb_power_up_serdes_link_82575(&adapter->hw);
1459 }
1460
1461 /**
1462 * igb_power_down_link - Power down the phy/serdes link
1463 * @adapter: address of board private structure
1464 */
1465 static void igb_power_down_link(struct igb_adapter *adapter)
1466 {
1467 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1468 igb_power_down_phy_copper_82575(&adapter->hw);
1469 else
1470 igb_shutdown_serdes_link_82575(&adapter->hw);
1471 }
1472
1473 /**
1474 * igb_up - Open the interface and prepare it to handle traffic
1475 * @adapter: board private structure
1476 **/
1477 int igb_up(struct igb_adapter *adapter)
1478 {
1479 struct e1000_hw *hw = &adapter->hw;
1480 int i;
1481
1482 /* hardware has been reset, we need to reload some things */
1483 igb_configure(adapter);
1484
1485 clear_bit(__IGB_DOWN, &adapter->state);
1486
1487 for (i = 0; i < adapter->num_q_vectors; i++) {
1488 struct igb_q_vector *q_vector = adapter->q_vector[i];
1489 napi_enable(&q_vector->napi);
1490 }
1491 if (adapter->msix_entries)
1492 igb_configure_msix(adapter);
1493 else
1494 igb_assign_vector(adapter->q_vector[0], 0);
1495
1496 /* Clear any pending interrupts. */
1497 rd32(E1000_ICR);
1498 igb_irq_enable(adapter);
1499
1500 /* notify VFs that reset has been completed */
1501 if (adapter->vfs_allocated_count) {
1502 u32 reg_data = rd32(E1000_CTRL_EXT);
1503 reg_data |= E1000_CTRL_EXT_PFRSTD;
1504 wr32(E1000_CTRL_EXT, reg_data);
1505 }
1506
1507 netif_tx_start_all_queues(adapter->netdev);
1508
1509 /* start the watchdog. */
1510 hw->mac.get_link_status = 1;
1511 schedule_work(&adapter->watchdog_task);
1512
1513 return 0;
1514 }
1515
1516 void igb_down(struct igb_adapter *adapter)
1517 {
1518 struct net_device *netdev = adapter->netdev;
1519 struct e1000_hw *hw = &adapter->hw;
1520 u32 tctl, rctl;
1521 int i;
1522
1523 /* signal that we're down so the interrupt handler does not
1524 * reschedule our watchdog timer */
1525 set_bit(__IGB_DOWN, &adapter->state);
1526
1527 /* disable receives in the hardware */
1528 rctl = rd32(E1000_RCTL);
1529 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1530 /* flush and sleep below */
1531
1532 netif_tx_stop_all_queues(netdev);
1533
1534 /* disable transmits in the hardware */
1535 tctl = rd32(E1000_TCTL);
1536 tctl &= ~E1000_TCTL_EN;
1537 wr32(E1000_TCTL, tctl);
1538 /* flush both disables and wait for them to finish */
1539 wrfl();
1540 msleep(10);
1541
1542 for (i = 0; i < adapter->num_q_vectors; i++) {
1543 struct igb_q_vector *q_vector = adapter->q_vector[i];
1544 napi_disable(&q_vector->napi);
1545 }
1546
1547 igb_irq_disable(adapter);
1548
1549 del_timer_sync(&adapter->watchdog_timer);
1550 del_timer_sync(&adapter->phy_info_timer);
1551
1552 netif_carrier_off(netdev);
1553
1554 /* record the stats before reset*/
1555 spin_lock(&adapter->stats64_lock);
1556 igb_update_stats(adapter, &adapter->stats64);
1557 spin_unlock(&adapter->stats64_lock);
1558
1559 adapter->link_speed = 0;
1560 adapter->link_duplex = 0;
1561
1562 if (!pci_channel_offline(adapter->pdev))
1563 igb_reset(adapter);
1564 igb_clean_all_tx_rings(adapter);
1565 igb_clean_all_rx_rings(adapter);
1566 #ifdef CONFIG_IGB_DCA
1567
1568 /* since we reset the hardware DCA settings were cleared */
1569 igb_setup_dca(adapter);
1570 #endif
1571 }
1572
1573 void igb_reinit_locked(struct igb_adapter *adapter)
1574 {
1575 WARN_ON(in_interrupt());
1576 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1577 msleep(1);
1578 igb_down(adapter);
1579 igb_up(adapter);
1580 clear_bit(__IGB_RESETTING, &adapter->state);
1581 }
1582
1583 void igb_reset(struct igb_adapter *adapter)
1584 {
1585 struct pci_dev *pdev = adapter->pdev;
1586 struct e1000_hw *hw = &adapter->hw;
1587 struct e1000_mac_info *mac = &hw->mac;
1588 struct e1000_fc_info *fc = &hw->fc;
1589 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1590 u16 hwm;
1591
1592 /* Repartition Pba for greater than 9k mtu
1593 * To take effect CTRL.RST is required.
1594 */
1595 switch (mac->type) {
1596 case e1000_i350:
1597 case e1000_82580:
1598 pba = rd32(E1000_RXPBS);
1599 pba = igb_rxpbs_adjust_82580(pba);
1600 break;
1601 case e1000_82576:
1602 pba = rd32(E1000_RXPBS);
1603 pba &= E1000_RXPBS_SIZE_MASK_82576;
1604 break;
1605 case e1000_82575:
1606 default:
1607 pba = E1000_PBA_34K;
1608 break;
1609 }
1610
1611 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1612 (mac->type < e1000_82576)) {
1613 /* adjust PBA for jumbo frames */
1614 wr32(E1000_PBA, pba);
1615
1616 /* To maintain wire speed transmits, the Tx FIFO should be
1617 * large enough to accommodate two full transmit packets,
1618 * rounded up to the next 1KB and expressed in KB. Likewise,
1619 * the Rx FIFO should be large enough to accommodate at least
1620 * one full receive packet and is similarly rounded up and
1621 * expressed in KB. */
1622 pba = rd32(E1000_PBA);
1623 /* upper 16 bits has Tx packet buffer allocation size in KB */
1624 tx_space = pba >> 16;
1625 /* lower 16 bits has Rx packet buffer allocation size in KB */
1626 pba &= 0xffff;
1627 /* the tx fifo also stores 16 bytes of information about the tx
1628 * but don't include ethernet FCS because hardware appends it */
1629 min_tx_space = (adapter->max_frame_size +
1630 sizeof(union e1000_adv_tx_desc) -
1631 ETH_FCS_LEN) * 2;
1632 min_tx_space = ALIGN(min_tx_space, 1024);
1633 min_tx_space >>= 10;
1634 /* software strips receive CRC, so leave room for it */
1635 min_rx_space = adapter->max_frame_size;
1636 min_rx_space = ALIGN(min_rx_space, 1024);
1637 min_rx_space >>= 10;
1638
1639 /* If current Tx allocation is less than the min Tx FIFO size,
1640 * and the min Tx FIFO size is less than the current Rx FIFO
1641 * allocation, take space away from current Rx allocation */
1642 if (tx_space < min_tx_space &&
1643 ((min_tx_space - tx_space) < pba)) {
1644 pba = pba - (min_tx_space - tx_space);
1645
1646 /* if short on rx space, rx wins and must trump tx
1647 * adjustment */
1648 if (pba < min_rx_space)
1649 pba = min_rx_space;
1650 }
1651 wr32(E1000_PBA, pba);
1652 }
1653
1654 /* flow control settings */
1655 /* The high water mark must be low enough to fit one full frame
1656 * (or the size used for early receive) above it in the Rx FIFO.
1657 * Set it to the lower of:
1658 * - 90% of the Rx FIFO size, or
1659 * - the full Rx FIFO size minus one full frame */
1660 hwm = min(((pba << 10) * 9 / 10),
1661 ((pba << 10) - 2 * adapter->max_frame_size));
1662
1663 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1664 fc->low_water = fc->high_water - 16;
1665 fc->pause_time = 0xFFFF;
1666 fc->send_xon = 1;
1667 fc->current_mode = fc->requested_mode;
1668
1669 /* disable receive for all VFs and wait one second */
1670 if (adapter->vfs_allocated_count) {
1671 int i;
1672 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1673 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1674
1675 /* ping all the active vfs to let them know we are going down */
1676 igb_ping_all_vfs(adapter);
1677
1678 /* disable transmits and receives */
1679 wr32(E1000_VFRE, 0);
1680 wr32(E1000_VFTE, 0);
1681 }
1682
1683 /* Allow time for pending master requests to run */
1684 hw->mac.ops.reset_hw(hw);
1685 wr32(E1000_WUC, 0);
1686
1687 if (hw->mac.ops.init_hw(hw))
1688 dev_err(&pdev->dev, "Hardware Error\n");
1689 if (hw->mac.type > e1000_82580) {
1690 if (adapter->flags & IGB_FLAG_DMAC) {
1691 u32 reg;
1692
1693 /*
1694 * DMA Coalescing high water mark needs to be higher
1695 * than * the * Rx threshold. The Rx threshold is
1696 * currently * pba - 6, so we * should use a high water
1697 * mark of pba * - 4. */
1698 hwm = (pba - 4) << 10;
1699
1700 reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1701 & E1000_DMACR_DMACTHR_MASK);
1702
1703 /* transition to L0x or L1 if available..*/
1704 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1705
1706 /* watchdog timer= +-1000 usec in 32usec intervals */
1707 reg |= (1000 >> 5);
1708 wr32(E1000_DMACR, reg);
1709
1710 /* no lower threshold to disable coalescing(smart fifb)
1711 * -UTRESH=0*/
1712 wr32(E1000_DMCRTRH, 0);
1713
1714 /* set hwm to PBA - 2 * max frame size */
1715 wr32(E1000_FCRTC, hwm);
1716
1717 /*
1718 * This sets the time to wait before requesting tran-
1719 * sition to * low power state to number of usecs needed
1720 * to receive 1 512 * byte frame at gigabit line rate
1721 */
1722 reg = rd32(E1000_DMCTLX);
1723 reg |= IGB_DMCTLX_DCFLUSH_DIS;
1724
1725 /* Delay 255 usec before entering Lx state. */
1726 reg |= 0xFF;
1727 wr32(E1000_DMCTLX, reg);
1728
1729 /* free space in Tx packet buffer to wake from DMAC */
1730 wr32(E1000_DMCTXTH,
1731 (IGB_MIN_TXPBSIZE -
1732 (IGB_TX_BUF_4096 + adapter->max_frame_size))
1733 >> 6);
1734
1735 /* make low power state decision controlled by DMAC */
1736 reg = rd32(E1000_PCIEMISC);
1737 reg |= E1000_PCIEMISC_LX_DECISION;
1738 wr32(E1000_PCIEMISC, reg);
1739 } /* end if IGB_FLAG_DMAC set */
1740 }
1741 if (hw->mac.type == e1000_82580) {
1742 u32 reg = rd32(E1000_PCIEMISC);
1743 wr32(E1000_PCIEMISC,
1744 reg & ~E1000_PCIEMISC_LX_DECISION);
1745 }
1746 if (!netif_running(adapter->netdev))
1747 igb_power_down_link(adapter);
1748
1749 igb_update_mng_vlan(adapter);
1750
1751 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1752 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1753
1754 igb_get_phy_info(hw);
1755 }
1756
1757 static u32 igb_fix_features(struct net_device *netdev, u32 features)
1758 {
1759 /*
1760 * Since there is no support for separate rx/tx vlan accel
1761 * enable/disable make sure tx flag is always in same state as rx.
1762 */
1763 if (features & NETIF_F_HW_VLAN_RX)
1764 features |= NETIF_F_HW_VLAN_TX;
1765 else
1766 features &= ~NETIF_F_HW_VLAN_TX;
1767
1768 return features;
1769 }
1770
1771 static int igb_set_features(struct net_device *netdev, u32 features)
1772 {
1773 struct igb_adapter *adapter = netdev_priv(netdev);
1774 int i;
1775 u32 changed = netdev->features ^ features;
1776
1777 for (i = 0; i < adapter->num_rx_queues; i++) {
1778 if (features & NETIF_F_RXCSUM)
1779 adapter->rx_ring[i]->flags |= IGB_RING_FLAG_RX_CSUM;
1780 else
1781 adapter->rx_ring[i]->flags &= ~IGB_RING_FLAG_RX_CSUM;
1782 }
1783
1784 if (changed & NETIF_F_HW_VLAN_RX)
1785 igb_vlan_mode(netdev, features);
1786
1787 return 0;
1788 }
1789
1790 static const struct net_device_ops igb_netdev_ops = {
1791 .ndo_open = igb_open,
1792 .ndo_stop = igb_close,
1793 .ndo_start_xmit = igb_xmit_frame,
1794 .ndo_get_stats64 = igb_get_stats64,
1795 .ndo_set_rx_mode = igb_set_rx_mode,
1796 .ndo_set_mac_address = igb_set_mac,
1797 .ndo_change_mtu = igb_change_mtu,
1798 .ndo_do_ioctl = igb_ioctl,
1799 .ndo_tx_timeout = igb_tx_timeout,
1800 .ndo_validate_addr = eth_validate_addr,
1801 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1802 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1803 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1804 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1805 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1806 .ndo_get_vf_config = igb_ndo_get_vf_config,
1807 #ifdef CONFIG_NET_POLL_CONTROLLER
1808 .ndo_poll_controller = igb_netpoll,
1809 #endif
1810 .ndo_fix_features = igb_fix_features,
1811 .ndo_set_features = igb_set_features,
1812 };
1813
1814 /**
1815 * igb_probe - Device Initialization Routine
1816 * @pdev: PCI device information struct
1817 * @ent: entry in igb_pci_tbl
1818 *
1819 * Returns 0 on success, negative on failure
1820 *
1821 * igb_probe initializes an adapter identified by a pci_dev structure.
1822 * The OS initialization, configuring of the adapter private structure,
1823 * and a hardware reset occur.
1824 **/
1825 static int __devinit igb_probe(struct pci_dev *pdev,
1826 const struct pci_device_id *ent)
1827 {
1828 struct net_device *netdev;
1829 struct igb_adapter *adapter;
1830 struct e1000_hw *hw;
1831 u16 eeprom_data = 0;
1832 s32 ret_val;
1833 static int global_quad_port_a; /* global quad port a indication */
1834 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1835 unsigned long mmio_start, mmio_len;
1836 int err, pci_using_dac;
1837 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1838 u8 part_str[E1000_PBANUM_LENGTH];
1839
1840 /* Catch broken hardware that put the wrong VF device ID in
1841 * the PCIe SR-IOV capability.
1842 */
1843 if (pdev->is_virtfn) {
1844 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1845 pci_name(pdev), pdev->vendor, pdev->device);
1846 return -EINVAL;
1847 }
1848
1849 err = pci_enable_device_mem(pdev);
1850 if (err)
1851 return err;
1852
1853 pci_using_dac = 0;
1854 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1855 if (!err) {
1856 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1857 if (!err)
1858 pci_using_dac = 1;
1859 } else {
1860 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1861 if (err) {
1862 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1863 if (err) {
1864 dev_err(&pdev->dev, "No usable DMA "
1865 "configuration, aborting\n");
1866 goto err_dma;
1867 }
1868 }
1869 }
1870
1871 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1872 IORESOURCE_MEM),
1873 igb_driver_name);
1874 if (err)
1875 goto err_pci_reg;
1876
1877 pci_enable_pcie_error_reporting(pdev);
1878
1879 pci_set_master(pdev);
1880 pci_save_state(pdev);
1881
1882 err = -ENOMEM;
1883 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1884 IGB_MAX_TX_QUEUES);
1885 if (!netdev)
1886 goto err_alloc_etherdev;
1887
1888 SET_NETDEV_DEV(netdev, &pdev->dev);
1889
1890 pci_set_drvdata(pdev, netdev);
1891 adapter = netdev_priv(netdev);
1892 adapter->netdev = netdev;
1893 adapter->pdev = pdev;
1894 hw = &adapter->hw;
1895 hw->back = adapter;
1896 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1897
1898 mmio_start = pci_resource_start(pdev, 0);
1899 mmio_len = pci_resource_len(pdev, 0);
1900
1901 err = -EIO;
1902 hw->hw_addr = ioremap(mmio_start, mmio_len);
1903 if (!hw->hw_addr)
1904 goto err_ioremap;
1905
1906 netdev->netdev_ops = &igb_netdev_ops;
1907 igb_set_ethtool_ops(netdev);
1908 netdev->watchdog_timeo = 5 * HZ;
1909
1910 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1911
1912 netdev->mem_start = mmio_start;
1913 netdev->mem_end = mmio_start + mmio_len;
1914
1915 /* PCI config space info */
1916 hw->vendor_id = pdev->vendor;
1917 hw->device_id = pdev->device;
1918 hw->revision_id = pdev->revision;
1919 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1920 hw->subsystem_device_id = pdev->subsystem_device;
1921
1922 /* Copy the default MAC, PHY and NVM function pointers */
1923 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1924 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1925 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1926 /* Initialize skew-specific constants */
1927 err = ei->get_invariants(hw);
1928 if (err)
1929 goto err_sw_init;
1930
1931 /* setup the private structure */
1932 err = igb_sw_init(adapter);
1933 if (err)
1934 goto err_sw_init;
1935
1936 igb_get_bus_info_pcie(hw);
1937
1938 hw->phy.autoneg_wait_to_complete = false;
1939
1940 /* Copper options */
1941 if (hw->phy.media_type == e1000_media_type_copper) {
1942 hw->phy.mdix = AUTO_ALL_MODES;
1943 hw->phy.disable_polarity_correction = false;
1944 hw->phy.ms_type = e1000_ms_hw_default;
1945 }
1946
1947 if (igb_check_reset_block(hw))
1948 dev_info(&pdev->dev,
1949 "PHY reset is blocked due to SOL/IDER session.\n");
1950
1951 netdev->hw_features = NETIF_F_SG |
1952 NETIF_F_IP_CSUM |
1953 NETIF_F_IPV6_CSUM |
1954 NETIF_F_TSO |
1955 NETIF_F_TSO6 |
1956 NETIF_F_RXCSUM |
1957 NETIF_F_HW_VLAN_RX;
1958
1959 netdev->features = netdev->hw_features |
1960 NETIF_F_HW_VLAN_TX |
1961 NETIF_F_HW_VLAN_FILTER;
1962
1963 netdev->vlan_features |= NETIF_F_TSO;
1964 netdev->vlan_features |= NETIF_F_TSO6;
1965 netdev->vlan_features |= NETIF_F_IP_CSUM;
1966 netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1967 netdev->vlan_features |= NETIF_F_SG;
1968
1969 if (pci_using_dac) {
1970 netdev->features |= NETIF_F_HIGHDMA;
1971 netdev->vlan_features |= NETIF_F_HIGHDMA;
1972 }
1973
1974 if (hw->mac.type >= e1000_82576) {
1975 netdev->hw_features |= NETIF_F_SCTP_CSUM;
1976 netdev->features |= NETIF_F_SCTP_CSUM;
1977 }
1978
1979 netdev->priv_flags |= IFF_UNICAST_FLT;
1980
1981 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1982
1983 /* before reading the NVM, reset the controller to put the device in a
1984 * known good starting state */
1985 hw->mac.ops.reset_hw(hw);
1986
1987 /* make sure the NVM is good */
1988 if (hw->nvm.ops.validate(hw) < 0) {
1989 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1990 err = -EIO;
1991 goto err_eeprom;
1992 }
1993
1994 /* copy the MAC address out of the NVM */
1995 if (hw->mac.ops.read_mac_addr(hw))
1996 dev_err(&pdev->dev, "NVM Read Error\n");
1997
1998 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1999 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2000
2001 if (!is_valid_ether_addr(netdev->perm_addr)) {
2002 dev_err(&pdev->dev, "Invalid MAC Address\n");
2003 err = -EIO;
2004 goto err_eeprom;
2005 }
2006
2007 setup_timer(&adapter->watchdog_timer, igb_watchdog,
2008 (unsigned long) adapter);
2009 setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2010 (unsigned long) adapter);
2011
2012 INIT_WORK(&adapter->reset_task, igb_reset_task);
2013 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2014
2015 /* Initialize link properties that are user-changeable */
2016 adapter->fc_autoneg = true;
2017 hw->mac.autoneg = true;
2018 hw->phy.autoneg_advertised = 0x2f;
2019
2020 hw->fc.requested_mode = e1000_fc_default;
2021 hw->fc.current_mode = e1000_fc_default;
2022
2023 igb_validate_mdi_setting(hw);
2024
2025 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2026 * enable the ACPI Magic Packet filter
2027 */
2028
2029 if (hw->bus.func == 0)
2030 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2031 else if (hw->mac.type >= e1000_82580)
2032 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2033 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2034 &eeprom_data);
2035 else if (hw->bus.func == 1)
2036 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2037
2038 if (eeprom_data & eeprom_apme_mask)
2039 adapter->eeprom_wol |= E1000_WUFC_MAG;
2040
2041 /* now that we have the eeprom settings, apply the special cases where
2042 * the eeprom may be wrong or the board simply won't support wake on
2043 * lan on a particular port */
2044 switch (pdev->device) {
2045 case E1000_DEV_ID_82575GB_QUAD_COPPER:
2046 adapter->eeprom_wol = 0;
2047 break;
2048 case E1000_DEV_ID_82575EB_FIBER_SERDES:
2049 case E1000_DEV_ID_82576_FIBER:
2050 case E1000_DEV_ID_82576_SERDES:
2051 /* Wake events only supported on port A for dual fiber
2052 * regardless of eeprom setting */
2053 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2054 adapter->eeprom_wol = 0;
2055 break;
2056 case E1000_DEV_ID_82576_QUAD_COPPER:
2057 case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2058 /* if quad port adapter, disable WoL on all but port A */
2059 if (global_quad_port_a != 0)
2060 adapter->eeprom_wol = 0;
2061 else
2062 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2063 /* Reset for multiple quad port adapters */
2064 if (++global_quad_port_a == 4)
2065 global_quad_port_a = 0;
2066 break;
2067 }
2068
2069 /* initialize the wol settings based on the eeprom settings */
2070 adapter->wol = adapter->eeprom_wol;
2071 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2072
2073 /* reset the hardware with the new settings */
2074 igb_reset(adapter);
2075
2076 /* let the f/w know that the h/w is now under the control of the
2077 * driver. */
2078 igb_get_hw_control(adapter);
2079
2080 strcpy(netdev->name, "eth%d");
2081 err = register_netdev(netdev);
2082 if (err)
2083 goto err_register;
2084
2085 igb_vlan_mode(netdev, netdev->features);
2086
2087 /* carrier off reporting is important to ethtool even BEFORE open */
2088 netif_carrier_off(netdev);
2089
2090 #ifdef CONFIG_IGB_DCA
2091 if (dca_add_requester(&pdev->dev) == 0) {
2092 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2093 dev_info(&pdev->dev, "DCA enabled\n");
2094 igb_setup_dca(adapter);
2095 }
2096
2097 #endif
2098 /* do hw tstamp init after resetting */
2099 igb_init_hw_timer(adapter);
2100
2101 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2102 /* print bus type/speed/width info */
2103 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2104 netdev->name,
2105 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2106 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2107 "unknown"),
2108 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2109 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2110 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2111 "unknown"),
2112 netdev->dev_addr);
2113
2114 ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2115 if (ret_val)
2116 strcpy(part_str, "Unknown");
2117 dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2118 dev_info(&pdev->dev,
2119 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2120 adapter->msix_entries ? "MSI-X" :
2121 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2122 adapter->num_rx_queues, adapter->num_tx_queues);
2123 switch (hw->mac.type) {
2124 case e1000_i350:
2125 igb_set_eee_i350(hw);
2126 break;
2127 default:
2128 break;
2129 }
2130 return 0;
2131
2132 err_register:
2133 igb_release_hw_control(adapter);
2134 err_eeprom:
2135 if (!igb_check_reset_block(hw))
2136 igb_reset_phy(hw);
2137
2138 if (hw->flash_address)
2139 iounmap(hw->flash_address);
2140 err_sw_init:
2141 igb_clear_interrupt_scheme(adapter);
2142 iounmap(hw->hw_addr);
2143 err_ioremap:
2144 free_netdev(netdev);
2145 err_alloc_etherdev:
2146 pci_release_selected_regions(pdev,
2147 pci_select_bars(pdev, IORESOURCE_MEM));
2148 err_pci_reg:
2149 err_dma:
2150 pci_disable_device(pdev);
2151 return err;
2152 }
2153
2154 /**
2155 * igb_remove - Device Removal Routine
2156 * @pdev: PCI device information struct
2157 *
2158 * igb_remove is called by the PCI subsystem to alert the driver
2159 * that it should release a PCI device. The could be caused by a
2160 * Hot-Plug event, or because the driver is going to be removed from
2161 * memory.
2162 **/
2163 static void __devexit igb_remove(struct pci_dev *pdev)
2164 {
2165 struct net_device *netdev = pci_get_drvdata(pdev);
2166 struct igb_adapter *adapter = netdev_priv(netdev);
2167 struct e1000_hw *hw = &adapter->hw;
2168
2169 /*
2170 * The watchdog timer may be rescheduled, so explicitly
2171 * disable watchdog from being rescheduled.
2172 */
2173 set_bit(__IGB_DOWN, &adapter->state);
2174 del_timer_sync(&adapter->watchdog_timer);
2175 del_timer_sync(&adapter->phy_info_timer);
2176
2177 cancel_work_sync(&adapter->reset_task);
2178 cancel_work_sync(&adapter->watchdog_task);
2179
2180 #ifdef CONFIG_IGB_DCA
2181 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2182 dev_info(&pdev->dev, "DCA disabled\n");
2183 dca_remove_requester(&pdev->dev);
2184 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2185 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2186 }
2187 #endif
2188
2189 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2190 * would have already happened in close and is redundant. */
2191 igb_release_hw_control(adapter);
2192
2193 unregister_netdev(netdev);
2194
2195 igb_clear_interrupt_scheme(adapter);
2196
2197 #ifdef CONFIG_PCI_IOV
2198 /* reclaim resources allocated to VFs */
2199 if (adapter->vf_data) {
2200 /* disable iov and allow time for transactions to clear */
2201 pci_disable_sriov(pdev);
2202 msleep(500);
2203
2204 kfree(adapter->vf_data);
2205 adapter->vf_data = NULL;
2206 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2207 wrfl();
2208 msleep(100);
2209 dev_info(&pdev->dev, "IOV Disabled\n");
2210 }
2211 #endif
2212
2213 iounmap(hw->hw_addr);
2214 if (hw->flash_address)
2215 iounmap(hw->flash_address);
2216 pci_release_selected_regions(pdev,
2217 pci_select_bars(pdev, IORESOURCE_MEM));
2218
2219 free_netdev(netdev);
2220
2221 pci_disable_pcie_error_reporting(pdev);
2222
2223 pci_disable_device(pdev);
2224 }
2225
2226 /**
2227 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2228 * @adapter: board private structure to initialize
2229 *
2230 * This function initializes the vf specific data storage and then attempts to
2231 * allocate the VFs. The reason for ordering it this way is because it is much
2232 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2233 * the memory for the VFs.
2234 **/
2235 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2236 {
2237 #ifdef CONFIG_PCI_IOV
2238 struct pci_dev *pdev = adapter->pdev;
2239
2240 if (adapter->vfs_allocated_count) {
2241 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2242 sizeof(struct vf_data_storage),
2243 GFP_KERNEL);
2244 /* if allocation failed then we do not support SR-IOV */
2245 if (!adapter->vf_data) {
2246 adapter->vfs_allocated_count = 0;
2247 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2248 "Data Storage\n");
2249 }
2250 }
2251
2252 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2253 kfree(adapter->vf_data);
2254 adapter->vf_data = NULL;
2255 #endif /* CONFIG_PCI_IOV */
2256 adapter->vfs_allocated_count = 0;
2257 #ifdef CONFIG_PCI_IOV
2258 } else {
2259 unsigned char mac_addr[ETH_ALEN];
2260 int i;
2261 dev_info(&pdev->dev, "%d vfs allocated\n",
2262 adapter->vfs_allocated_count);
2263 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2264 random_ether_addr(mac_addr);
2265 igb_set_vf_mac(adapter, i, mac_addr);
2266 }
2267 /* DMA Coalescing is not supported in IOV mode. */
2268 if (adapter->flags & IGB_FLAG_DMAC)
2269 adapter->flags &= ~IGB_FLAG_DMAC;
2270 }
2271 #endif /* CONFIG_PCI_IOV */
2272 }
2273
2274
2275 /**
2276 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2277 * @adapter: board private structure to initialize
2278 *
2279 * igb_init_hw_timer initializes the function pointer and values for the hw
2280 * timer found in hardware.
2281 **/
2282 static void igb_init_hw_timer(struct igb_adapter *adapter)
2283 {
2284 struct e1000_hw *hw = &adapter->hw;
2285
2286 switch (hw->mac.type) {
2287 case e1000_i350:
2288 case e1000_82580:
2289 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2290 adapter->cycles.read = igb_read_clock;
2291 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2292 adapter->cycles.mult = 1;
2293 /*
2294 * The 82580 timesync updates the system timer every 8ns by 8ns
2295 * and the value cannot be shifted. Instead we need to shift
2296 * the registers to generate a 64bit timer value. As a result
2297 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2298 * 24 in order to generate a larger value for synchronization.
2299 */
2300 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2301 /* disable system timer temporarily by setting bit 31 */
2302 wr32(E1000_TSAUXC, 0x80000000);
2303 wrfl();
2304
2305 /* Set registers so that rollover occurs soon to test this. */
2306 wr32(E1000_SYSTIMR, 0x00000000);
2307 wr32(E1000_SYSTIML, 0x80000000);
2308 wr32(E1000_SYSTIMH, 0x000000FF);
2309 wrfl();
2310
2311 /* enable system timer by clearing bit 31 */
2312 wr32(E1000_TSAUXC, 0x0);
2313 wrfl();
2314
2315 timecounter_init(&adapter->clock,
2316 &adapter->cycles,
2317 ktime_to_ns(ktime_get_real()));
2318 /*
2319 * Synchronize our NIC clock against system wall clock. NIC
2320 * time stamp reading requires ~3us per sample, each sample
2321 * was pretty stable even under load => only require 10
2322 * samples for each offset comparison.
2323 */
2324 memset(&adapter->compare, 0, sizeof(adapter->compare));
2325 adapter->compare.source = &adapter->clock;
2326 adapter->compare.target = ktime_get_real;
2327 adapter->compare.num_samples = 10;
2328 timecompare_update(&adapter->compare, 0);
2329 break;
2330 case e1000_82576:
2331 /*
2332 * Initialize hardware timer: we keep it running just in case
2333 * that some program needs it later on.
2334 */
2335 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2336 adapter->cycles.read = igb_read_clock;
2337 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2338 adapter->cycles.mult = 1;
2339 /**
2340 * Scale the NIC clock cycle by a large factor so that
2341 * relatively small clock corrections can be added or
2342 * subtracted at each clock tick. The drawbacks of a large
2343 * factor are a) that the clock register overflows more quickly
2344 * (not such a big deal) and b) that the increment per tick has
2345 * to fit into 24 bits. As a result we need to use a shift of
2346 * 19 so we can fit a value of 16 into the TIMINCA register.
2347 */
2348 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2349 wr32(E1000_TIMINCA,
2350 (1 << E1000_TIMINCA_16NS_SHIFT) |
2351 (16 << IGB_82576_TSYNC_SHIFT));
2352
2353 /* Set registers so that rollover occurs soon to test this. */
2354 wr32(E1000_SYSTIML, 0x00000000);
2355 wr32(E1000_SYSTIMH, 0xFF800000);
2356 wrfl();
2357
2358 timecounter_init(&adapter->clock,
2359 &adapter->cycles,
2360 ktime_to_ns(ktime_get_real()));
2361 /*
2362 * Synchronize our NIC clock against system wall clock. NIC
2363 * time stamp reading requires ~3us per sample, each sample
2364 * was pretty stable even under load => only require 10
2365 * samples for each offset comparison.
2366 */
2367 memset(&adapter->compare, 0, sizeof(adapter->compare));
2368 adapter->compare.source = &adapter->clock;
2369 adapter->compare.target = ktime_get_real;
2370 adapter->compare.num_samples = 10;
2371 timecompare_update(&adapter->compare, 0);
2372 break;
2373 case e1000_82575:
2374 /* 82575 does not support timesync */
2375 default:
2376 break;
2377 }
2378
2379 }
2380
2381 /**
2382 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2383 * @adapter: board private structure to initialize
2384 *
2385 * igb_sw_init initializes the Adapter private data structure.
2386 * Fields are initialized based on PCI device information and
2387 * OS network device settings (MTU size).
2388 **/
2389 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2390 {
2391 struct e1000_hw *hw = &adapter->hw;
2392 struct net_device *netdev = adapter->netdev;
2393 struct pci_dev *pdev = adapter->pdev;
2394
2395 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2396
2397 /* set default ring sizes */
2398 adapter->tx_ring_count = IGB_DEFAULT_TXD;
2399 adapter->rx_ring_count = IGB_DEFAULT_RXD;
2400
2401 /* set default ITR values */
2402 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2403 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2404
2405 /* set default work limits */
2406 adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2407
2408 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2409 VLAN_HLEN;
2410 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2411
2412 spin_lock_init(&adapter->stats64_lock);
2413 #ifdef CONFIG_PCI_IOV
2414 switch (hw->mac.type) {
2415 case e1000_82576:
2416 case e1000_i350:
2417 if (max_vfs > 7) {
2418 dev_warn(&pdev->dev,
2419 "Maximum of 7 VFs per PF, using max\n");
2420 adapter->vfs_allocated_count = 7;
2421 } else
2422 adapter->vfs_allocated_count = max_vfs;
2423 break;
2424 default:
2425 break;
2426 }
2427 #endif /* CONFIG_PCI_IOV */
2428 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2429 /* i350 cannot do RSS and SR-IOV at the same time */
2430 if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2431 adapter->rss_queues = 1;
2432
2433 /*
2434 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2435 * then we should combine the queues into a queue pair in order to
2436 * conserve interrupts due to limited supply
2437 */
2438 if ((adapter->rss_queues > 4) ||
2439 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2440 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2441
2442 /* This call may decrease the number of queues */
2443 if (igb_init_interrupt_scheme(adapter)) {
2444 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2445 return -ENOMEM;
2446 }
2447
2448 igb_probe_vfs(adapter);
2449
2450 /* Explicitly disable IRQ since the NIC can be in any state. */
2451 igb_irq_disable(adapter);
2452
2453 if (hw->mac.type == e1000_i350)
2454 adapter->flags &= ~IGB_FLAG_DMAC;
2455
2456 set_bit(__IGB_DOWN, &adapter->state);
2457 return 0;
2458 }
2459
2460 /**
2461 * igb_open - Called when a network interface is made active
2462 * @netdev: network interface device structure
2463 *
2464 * Returns 0 on success, negative value on failure
2465 *
2466 * The open entry point is called when a network interface is made
2467 * active by the system (IFF_UP). At this point all resources needed
2468 * for transmit and receive operations are allocated, the interrupt
2469 * handler is registered with the OS, the watchdog timer is started,
2470 * and the stack is notified that the interface is ready.
2471 **/
2472 static int igb_open(struct net_device *netdev)
2473 {
2474 struct igb_adapter *adapter = netdev_priv(netdev);
2475 struct e1000_hw *hw = &adapter->hw;
2476 int err;
2477 int i;
2478
2479 /* disallow open during test */
2480 if (test_bit(__IGB_TESTING, &adapter->state))
2481 return -EBUSY;
2482
2483 netif_carrier_off(netdev);
2484
2485 /* allocate transmit descriptors */
2486 err = igb_setup_all_tx_resources(adapter);
2487 if (err)
2488 goto err_setup_tx;
2489
2490 /* allocate receive descriptors */
2491 err = igb_setup_all_rx_resources(adapter);
2492 if (err)
2493 goto err_setup_rx;
2494
2495 igb_power_up_link(adapter);
2496
2497 /* before we allocate an interrupt, we must be ready to handle it.
2498 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2499 * as soon as we call pci_request_irq, so we have to setup our
2500 * clean_rx handler before we do so. */
2501 igb_configure(adapter);
2502
2503 err = igb_request_irq(adapter);
2504 if (err)
2505 goto err_req_irq;
2506
2507 /* From here on the code is the same as igb_up() */
2508 clear_bit(__IGB_DOWN, &adapter->state);
2509
2510 for (i = 0; i < adapter->num_q_vectors; i++) {
2511 struct igb_q_vector *q_vector = adapter->q_vector[i];
2512 napi_enable(&q_vector->napi);
2513 }
2514
2515 /* Clear any pending interrupts. */
2516 rd32(E1000_ICR);
2517
2518 igb_irq_enable(adapter);
2519
2520 /* notify VFs that reset has been completed */
2521 if (adapter->vfs_allocated_count) {
2522 u32 reg_data = rd32(E1000_CTRL_EXT);
2523 reg_data |= E1000_CTRL_EXT_PFRSTD;
2524 wr32(E1000_CTRL_EXT, reg_data);
2525 }
2526
2527 netif_tx_start_all_queues(netdev);
2528
2529 /* start the watchdog. */
2530 hw->mac.get_link_status = 1;
2531 schedule_work(&adapter->watchdog_task);
2532
2533 return 0;
2534
2535 err_req_irq:
2536 igb_release_hw_control(adapter);
2537 igb_power_down_link(adapter);
2538 igb_free_all_rx_resources(adapter);
2539 err_setup_rx:
2540 igb_free_all_tx_resources(adapter);
2541 err_setup_tx:
2542 igb_reset(adapter);
2543
2544 return err;
2545 }
2546
2547 /**
2548 * igb_close - Disables a network interface
2549 * @netdev: network interface device structure
2550 *
2551 * Returns 0, this is not allowed to fail
2552 *
2553 * The close entry point is called when an interface is de-activated
2554 * by the OS. The hardware is still under the driver's control, but
2555 * needs to be disabled. A global MAC reset is issued to stop the
2556 * hardware, and all transmit and receive resources are freed.
2557 **/
2558 static int igb_close(struct net_device *netdev)
2559 {
2560 struct igb_adapter *adapter = netdev_priv(netdev);
2561
2562 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2563 igb_down(adapter);
2564
2565 igb_free_irq(adapter);
2566
2567 igb_free_all_tx_resources(adapter);
2568 igb_free_all_rx_resources(adapter);
2569
2570 return 0;
2571 }
2572
2573 /**
2574 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2575 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2576 *
2577 * Return 0 on success, negative on failure
2578 **/
2579 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2580 {
2581 struct device *dev = tx_ring->dev;
2582 int size;
2583
2584 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2585 tx_ring->tx_buffer_info = vzalloc(size);
2586 if (!tx_ring->tx_buffer_info)
2587 goto err;
2588
2589 /* round up to nearest 4K */
2590 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2591 tx_ring->size = ALIGN(tx_ring->size, 4096);
2592
2593 tx_ring->desc = dma_alloc_coherent(dev,
2594 tx_ring->size,
2595 &tx_ring->dma,
2596 GFP_KERNEL);
2597
2598 if (!tx_ring->desc)
2599 goto err;
2600
2601 tx_ring->next_to_use = 0;
2602 tx_ring->next_to_clean = 0;
2603 return 0;
2604
2605 err:
2606 vfree(tx_ring->tx_buffer_info);
2607 dev_err(dev,
2608 "Unable to allocate memory for the transmit descriptor ring\n");
2609 return -ENOMEM;
2610 }
2611
2612 /**
2613 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2614 * (Descriptors) for all queues
2615 * @adapter: board private structure
2616 *
2617 * Return 0 on success, negative on failure
2618 **/
2619 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2620 {
2621 struct pci_dev *pdev = adapter->pdev;
2622 int i, err = 0;
2623
2624 for (i = 0; i < adapter->num_tx_queues; i++) {
2625 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2626 if (err) {
2627 dev_err(&pdev->dev,
2628 "Allocation for Tx Queue %u failed\n", i);
2629 for (i--; i >= 0; i--)
2630 igb_free_tx_resources(adapter->tx_ring[i]);
2631 break;
2632 }
2633 }
2634
2635 return err;
2636 }
2637
2638 /**
2639 * igb_setup_tctl - configure the transmit control registers
2640 * @adapter: Board private structure
2641 **/
2642 void igb_setup_tctl(struct igb_adapter *adapter)
2643 {
2644 struct e1000_hw *hw = &adapter->hw;
2645 u32 tctl;
2646
2647 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2648 wr32(E1000_TXDCTL(0), 0);
2649
2650 /* Program the Transmit Control Register */
2651 tctl = rd32(E1000_TCTL);
2652 tctl &= ~E1000_TCTL_CT;
2653 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2654 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2655
2656 igb_config_collision_dist(hw);
2657
2658 /* Enable transmits */
2659 tctl |= E1000_TCTL_EN;
2660
2661 wr32(E1000_TCTL, tctl);
2662 }
2663
2664 /**
2665 * igb_configure_tx_ring - Configure transmit ring after Reset
2666 * @adapter: board private structure
2667 * @ring: tx ring to configure
2668 *
2669 * Configure a transmit ring after a reset.
2670 **/
2671 void igb_configure_tx_ring(struct igb_adapter *adapter,
2672 struct igb_ring *ring)
2673 {
2674 struct e1000_hw *hw = &adapter->hw;
2675 u32 txdctl = 0;
2676 u64 tdba = ring->dma;
2677 int reg_idx = ring->reg_idx;
2678
2679 /* disable the queue */
2680 wr32(E1000_TXDCTL(reg_idx), 0);
2681 wrfl();
2682 mdelay(10);
2683
2684 wr32(E1000_TDLEN(reg_idx),
2685 ring->count * sizeof(union e1000_adv_tx_desc));
2686 wr32(E1000_TDBAL(reg_idx),
2687 tdba & 0x00000000ffffffffULL);
2688 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2689
2690 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2691 wr32(E1000_TDH(reg_idx), 0);
2692 writel(0, ring->tail);
2693
2694 txdctl |= IGB_TX_PTHRESH;
2695 txdctl |= IGB_TX_HTHRESH << 8;
2696 txdctl |= IGB_TX_WTHRESH << 16;
2697
2698 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2699 wr32(E1000_TXDCTL(reg_idx), txdctl);
2700 }
2701
2702 /**
2703 * igb_configure_tx - Configure transmit Unit after Reset
2704 * @adapter: board private structure
2705 *
2706 * Configure the Tx unit of the MAC after a reset.
2707 **/
2708 static void igb_configure_tx(struct igb_adapter *adapter)
2709 {
2710 int i;
2711
2712 for (i = 0; i < adapter->num_tx_queues; i++)
2713 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2714 }
2715
2716 /**
2717 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2718 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2719 *
2720 * Returns 0 on success, negative on failure
2721 **/
2722 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2723 {
2724 struct device *dev = rx_ring->dev;
2725 int size, desc_len;
2726
2727 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2728 rx_ring->rx_buffer_info = vzalloc(size);
2729 if (!rx_ring->rx_buffer_info)
2730 goto err;
2731
2732 desc_len = sizeof(union e1000_adv_rx_desc);
2733
2734 /* Round up to nearest 4K */
2735 rx_ring->size = rx_ring->count * desc_len;
2736 rx_ring->size = ALIGN(rx_ring->size, 4096);
2737
2738 rx_ring->desc = dma_alloc_coherent(dev,
2739 rx_ring->size,
2740 &rx_ring->dma,
2741 GFP_KERNEL);
2742
2743 if (!rx_ring->desc)
2744 goto err;
2745
2746 rx_ring->next_to_clean = 0;
2747 rx_ring->next_to_use = 0;
2748
2749 return 0;
2750
2751 err:
2752 vfree(rx_ring->rx_buffer_info);
2753 rx_ring->rx_buffer_info = NULL;
2754 dev_err(dev, "Unable to allocate memory for the receive descriptor"
2755 " ring\n");
2756 return -ENOMEM;
2757 }
2758
2759 /**
2760 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2761 * (Descriptors) for all queues
2762 * @adapter: board private structure
2763 *
2764 * Return 0 on success, negative on failure
2765 **/
2766 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2767 {
2768 struct pci_dev *pdev = adapter->pdev;
2769 int i, err = 0;
2770
2771 for (i = 0; i < adapter->num_rx_queues; i++) {
2772 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2773 if (err) {
2774 dev_err(&pdev->dev,
2775 "Allocation for Rx Queue %u failed\n", i);
2776 for (i--; i >= 0; i--)
2777 igb_free_rx_resources(adapter->rx_ring[i]);
2778 break;
2779 }
2780 }
2781
2782 return err;
2783 }
2784
2785 /**
2786 * igb_setup_mrqc - configure the multiple receive queue control registers
2787 * @adapter: Board private structure
2788 **/
2789 static void igb_setup_mrqc(struct igb_adapter *adapter)
2790 {
2791 struct e1000_hw *hw = &adapter->hw;
2792 u32 mrqc, rxcsum;
2793 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2794 union e1000_reta {
2795 u32 dword;
2796 u8 bytes[4];
2797 } reta;
2798 static const u8 rsshash[40] = {
2799 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2800 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2801 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2802 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2803
2804 /* Fill out hash function seeds */
2805 for (j = 0; j < 10; j++) {
2806 u32 rsskey = rsshash[(j * 4)];
2807 rsskey |= rsshash[(j * 4) + 1] << 8;
2808 rsskey |= rsshash[(j * 4) + 2] << 16;
2809 rsskey |= rsshash[(j * 4) + 3] << 24;
2810 array_wr32(E1000_RSSRK(0), j, rsskey);
2811 }
2812
2813 num_rx_queues = adapter->rss_queues;
2814
2815 if (adapter->vfs_allocated_count) {
2816 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2817 switch (hw->mac.type) {
2818 case e1000_i350:
2819 case e1000_82580:
2820 num_rx_queues = 1;
2821 shift = 0;
2822 break;
2823 case e1000_82576:
2824 shift = 3;
2825 num_rx_queues = 2;
2826 break;
2827 case e1000_82575:
2828 shift = 2;
2829 shift2 = 6;
2830 default:
2831 break;
2832 }
2833 } else {
2834 if (hw->mac.type == e1000_82575)
2835 shift = 6;
2836 }
2837
2838 for (j = 0; j < (32 * 4); j++) {
2839 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2840 if (shift2)
2841 reta.bytes[j & 3] |= num_rx_queues << shift2;
2842 if ((j & 3) == 3)
2843 wr32(E1000_RETA(j >> 2), reta.dword);
2844 }
2845
2846 /*
2847 * Disable raw packet checksumming so that RSS hash is placed in
2848 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2849 * offloads as they are enabled by default
2850 */
2851 rxcsum = rd32(E1000_RXCSUM);
2852 rxcsum |= E1000_RXCSUM_PCSD;
2853
2854 if (adapter->hw.mac.type >= e1000_82576)
2855 /* Enable Receive Checksum Offload for SCTP */
2856 rxcsum |= E1000_RXCSUM_CRCOFL;
2857
2858 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2859 wr32(E1000_RXCSUM, rxcsum);
2860
2861 /* If VMDq is enabled then we set the appropriate mode for that, else
2862 * we default to RSS so that an RSS hash is calculated per packet even
2863 * if we are only using one queue */
2864 if (adapter->vfs_allocated_count) {
2865 if (hw->mac.type > e1000_82575) {
2866 /* Set the default pool for the PF's first queue */
2867 u32 vtctl = rd32(E1000_VT_CTL);
2868 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2869 E1000_VT_CTL_DISABLE_DEF_POOL);
2870 vtctl |= adapter->vfs_allocated_count <<
2871 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2872 wr32(E1000_VT_CTL, vtctl);
2873 }
2874 if (adapter->rss_queues > 1)
2875 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2876 else
2877 mrqc = E1000_MRQC_ENABLE_VMDQ;
2878 } else {
2879 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2880 }
2881 igb_vmm_control(adapter);
2882
2883 /*
2884 * Generate RSS hash based on TCP port numbers and/or
2885 * IPv4/v6 src and dst addresses since UDP cannot be
2886 * hashed reliably due to IP fragmentation
2887 */
2888 mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2889 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2890 E1000_MRQC_RSS_FIELD_IPV6 |
2891 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2892 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2893
2894 wr32(E1000_MRQC, mrqc);
2895 }
2896
2897 /**
2898 * igb_setup_rctl - configure the receive control registers
2899 * @adapter: Board private structure
2900 **/
2901 void igb_setup_rctl(struct igb_adapter *adapter)
2902 {
2903 struct e1000_hw *hw = &adapter->hw;
2904 u32 rctl;
2905
2906 rctl = rd32(E1000_RCTL);
2907
2908 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2909 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2910
2911 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2912 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2913
2914 /*
2915 * enable stripping of CRC. It's unlikely this will break BMC
2916 * redirection as it did with e1000. Newer features require
2917 * that the HW strips the CRC.
2918 */
2919 rctl |= E1000_RCTL_SECRC;
2920
2921 /* disable store bad packets and clear size bits. */
2922 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2923
2924 /* enable LPE to prevent packets larger than max_frame_size */
2925 rctl |= E1000_RCTL_LPE;
2926
2927 /* disable queue 0 to prevent tail write w/o re-config */
2928 wr32(E1000_RXDCTL(0), 0);
2929
2930 /* Attention!!! For SR-IOV PF driver operations you must enable
2931 * queue drop for all VF and PF queues to prevent head of line blocking
2932 * if an un-trusted VF does not provide descriptors to hardware.
2933 */
2934 if (adapter->vfs_allocated_count) {
2935 /* set all queue drop enable bits */
2936 wr32(E1000_QDE, ALL_QUEUES);
2937 }
2938
2939 wr32(E1000_RCTL, rctl);
2940 }
2941
2942 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2943 int vfn)
2944 {
2945 struct e1000_hw *hw = &adapter->hw;
2946 u32 vmolr;
2947
2948 /* if it isn't the PF check to see if VFs are enabled and
2949 * increase the size to support vlan tags */
2950 if (vfn < adapter->vfs_allocated_count &&
2951 adapter->vf_data[vfn].vlans_enabled)
2952 size += VLAN_TAG_SIZE;
2953
2954 vmolr = rd32(E1000_VMOLR(vfn));
2955 vmolr &= ~E1000_VMOLR_RLPML_MASK;
2956 vmolr |= size | E1000_VMOLR_LPE;
2957 wr32(E1000_VMOLR(vfn), vmolr);
2958
2959 return 0;
2960 }
2961
2962 /**
2963 * igb_rlpml_set - set maximum receive packet size
2964 * @adapter: board private structure
2965 *
2966 * Configure maximum receivable packet size.
2967 **/
2968 static void igb_rlpml_set(struct igb_adapter *adapter)
2969 {
2970 u32 max_frame_size = adapter->max_frame_size;
2971 struct e1000_hw *hw = &adapter->hw;
2972 u16 pf_id = adapter->vfs_allocated_count;
2973
2974 if (pf_id) {
2975 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2976 /*
2977 * If we're in VMDQ or SR-IOV mode, then set global RLPML
2978 * to our max jumbo frame size, in case we need to enable
2979 * jumbo frames on one of the rings later.
2980 * This will not pass over-length frames into the default
2981 * queue because it's gated by the VMOLR.RLPML.
2982 */
2983 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2984 }
2985
2986 wr32(E1000_RLPML, max_frame_size);
2987 }
2988
2989 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2990 int vfn, bool aupe)
2991 {
2992 struct e1000_hw *hw = &adapter->hw;
2993 u32 vmolr;
2994
2995 /*
2996 * This register exists only on 82576 and newer so if we are older then
2997 * we should exit and do nothing
2998 */
2999 if (hw->mac.type < e1000_82576)
3000 return;
3001
3002 vmolr = rd32(E1000_VMOLR(vfn));
3003 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
3004 if (aupe)
3005 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
3006 else
3007 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3008
3009 /* clear all bits that might not be set */
3010 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3011
3012 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3013 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3014 /*
3015 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3016 * multicast packets
3017 */
3018 if (vfn <= adapter->vfs_allocated_count)
3019 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
3020
3021 wr32(E1000_VMOLR(vfn), vmolr);
3022 }
3023
3024 /**
3025 * igb_configure_rx_ring - Configure a receive ring after Reset
3026 * @adapter: board private structure
3027 * @ring: receive ring to be configured
3028 *
3029 * Configure the Rx unit of the MAC after a reset.
3030 **/
3031 void igb_configure_rx_ring(struct igb_adapter *adapter,
3032 struct igb_ring *ring)
3033 {
3034 struct e1000_hw *hw = &adapter->hw;
3035 u64 rdba = ring->dma;
3036 int reg_idx = ring->reg_idx;
3037 u32 srrctl = 0, rxdctl = 0;
3038
3039 /* disable the queue */
3040 wr32(E1000_RXDCTL(reg_idx), 0);
3041
3042 /* Set DMA base address registers */
3043 wr32(E1000_RDBAL(reg_idx),
3044 rdba & 0x00000000ffffffffULL);
3045 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3046 wr32(E1000_RDLEN(reg_idx),
3047 ring->count * sizeof(union e1000_adv_rx_desc));
3048
3049 /* initialize head and tail */
3050 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3051 wr32(E1000_RDH(reg_idx), 0);
3052 writel(0, ring->tail);
3053
3054 /* set descriptor configuration */
3055 srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3056 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3057 srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3058 #else
3059 srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3060 #endif
3061 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3062 if (hw->mac.type == e1000_82580)
3063 srrctl |= E1000_SRRCTL_TIMESTAMP;
3064 /* Only set Drop Enable if we are supporting multiple queues */
3065 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3066 srrctl |= E1000_SRRCTL_DROP_EN;
3067
3068 wr32(E1000_SRRCTL(reg_idx), srrctl);
3069
3070 /* set filtering for VMDQ pools */
3071 igb_set_vmolr(adapter, reg_idx & 0x7, true);
3072
3073 rxdctl |= IGB_RX_PTHRESH;
3074 rxdctl |= IGB_RX_HTHRESH << 8;
3075 rxdctl |= IGB_RX_WTHRESH << 16;
3076
3077 /* enable receive descriptor fetching */
3078 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3079 wr32(E1000_RXDCTL(reg_idx), rxdctl);
3080 }
3081
3082 /**
3083 * igb_configure_rx - Configure receive Unit after Reset
3084 * @adapter: board private structure
3085 *
3086 * Configure the Rx unit of the MAC after a reset.
3087 **/
3088 static void igb_configure_rx(struct igb_adapter *adapter)
3089 {
3090 int i;
3091
3092 /* set UTA to appropriate mode */
3093 igb_set_uta(adapter);
3094
3095 /* set the correct pool for the PF default MAC address in entry 0 */
3096 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3097 adapter->vfs_allocated_count);
3098
3099 /* Setup the HW Rx Head and Tail Descriptor Pointers and
3100 * the Base and Length of the Rx Descriptor Ring */
3101 for (i = 0; i < adapter->num_rx_queues; i++)
3102 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3103 }
3104
3105 /**
3106 * igb_free_tx_resources - Free Tx Resources per Queue
3107 * @tx_ring: Tx descriptor ring for a specific queue
3108 *
3109 * Free all transmit software resources
3110 **/
3111 void igb_free_tx_resources(struct igb_ring *tx_ring)
3112 {
3113 igb_clean_tx_ring(tx_ring);
3114
3115 vfree(tx_ring->tx_buffer_info);
3116 tx_ring->tx_buffer_info = NULL;
3117
3118 /* if not set, then don't free */
3119 if (!tx_ring->desc)
3120 return;
3121
3122 dma_free_coherent(tx_ring->dev, tx_ring->size,
3123 tx_ring->desc, tx_ring->dma);
3124
3125 tx_ring->desc = NULL;
3126 }
3127
3128 /**
3129 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3130 * @adapter: board private structure
3131 *
3132 * Free all transmit software resources
3133 **/
3134 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3135 {
3136 int i;
3137
3138 for (i = 0; i < adapter->num_tx_queues; i++)
3139 igb_free_tx_resources(adapter->tx_ring[i]);
3140 }
3141
3142 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3143 struct igb_tx_buffer *buffer_info)
3144 {
3145 if (buffer_info->dma) {
3146 if (buffer_info->tx_flags & IGB_TX_FLAGS_MAPPED_AS_PAGE)
3147 dma_unmap_page(tx_ring->dev,
3148 buffer_info->dma,
3149 buffer_info->length,
3150 DMA_TO_DEVICE);
3151 else
3152 dma_unmap_single(tx_ring->dev,
3153 buffer_info->dma,
3154 buffer_info->length,
3155 DMA_TO_DEVICE);
3156 buffer_info->dma = 0;
3157 }
3158 if (buffer_info->skb) {
3159 dev_kfree_skb_any(buffer_info->skb);
3160 buffer_info->skb = NULL;
3161 }
3162 buffer_info->time_stamp = 0;
3163 buffer_info->length = 0;
3164 buffer_info->next_to_watch = NULL;
3165 }
3166
3167 /**
3168 * igb_clean_tx_ring - Free Tx Buffers
3169 * @tx_ring: ring to be cleaned
3170 **/
3171 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3172 {
3173 struct igb_tx_buffer *buffer_info;
3174 unsigned long size;
3175 unsigned int i;
3176
3177 if (!tx_ring->tx_buffer_info)
3178 return;
3179 /* Free all the Tx ring sk_buffs */
3180
3181 for (i = 0; i < tx_ring->count; i++) {
3182 buffer_info = &tx_ring->tx_buffer_info[i];
3183 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3184 }
3185
3186 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3187 memset(tx_ring->tx_buffer_info, 0, size);
3188
3189 /* Zero out the descriptor ring */
3190 memset(tx_ring->desc, 0, tx_ring->size);
3191
3192 tx_ring->next_to_use = 0;
3193 tx_ring->next_to_clean = 0;
3194 }
3195
3196 /**
3197 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3198 * @adapter: board private structure
3199 **/
3200 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3201 {
3202 int i;
3203
3204 for (i = 0; i < adapter->num_tx_queues; i++)
3205 igb_clean_tx_ring(adapter->tx_ring[i]);
3206 }
3207
3208 /**
3209 * igb_free_rx_resources - Free Rx Resources
3210 * @rx_ring: ring to clean the resources from
3211 *
3212 * Free all receive software resources
3213 **/
3214 void igb_free_rx_resources(struct igb_ring *rx_ring)
3215 {
3216 igb_clean_rx_ring(rx_ring);
3217
3218 vfree(rx_ring->rx_buffer_info);
3219 rx_ring->rx_buffer_info = NULL;
3220
3221 /* if not set, then don't free */
3222 if (!rx_ring->desc)
3223 return;
3224
3225 dma_free_coherent(rx_ring->dev, rx_ring->size,
3226 rx_ring->desc, rx_ring->dma);
3227
3228 rx_ring->desc = NULL;
3229 }
3230
3231 /**
3232 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3233 * @adapter: board private structure
3234 *
3235 * Free all receive software resources
3236 **/
3237 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3238 {
3239 int i;
3240
3241 for (i = 0; i < adapter->num_rx_queues; i++)
3242 igb_free_rx_resources(adapter->rx_ring[i]);
3243 }
3244
3245 /**
3246 * igb_clean_rx_ring - Free Rx Buffers per Queue
3247 * @rx_ring: ring to free buffers from
3248 **/
3249 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3250 {
3251 unsigned long size;
3252 u16 i;
3253
3254 if (!rx_ring->rx_buffer_info)
3255 return;
3256
3257 /* Free all the Rx ring sk_buffs */
3258 for (i = 0; i < rx_ring->count; i++) {
3259 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3260 if (buffer_info->dma) {
3261 dma_unmap_single(rx_ring->dev,
3262 buffer_info->dma,
3263 IGB_RX_HDR_LEN,
3264 DMA_FROM_DEVICE);
3265 buffer_info->dma = 0;
3266 }
3267
3268 if (buffer_info->skb) {
3269 dev_kfree_skb(buffer_info->skb);
3270 buffer_info->skb = NULL;
3271 }
3272 if (buffer_info->page_dma) {
3273 dma_unmap_page(rx_ring->dev,
3274 buffer_info->page_dma,
3275 PAGE_SIZE / 2,
3276 DMA_FROM_DEVICE);
3277 buffer_info->page_dma = 0;
3278 }
3279 if (buffer_info->page) {
3280 put_page(buffer_info->page);
3281 buffer_info->page = NULL;
3282 buffer_info->page_offset = 0;
3283 }
3284 }
3285
3286 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3287 memset(rx_ring->rx_buffer_info, 0, size);
3288
3289 /* Zero out the descriptor ring */
3290 memset(rx_ring->desc, 0, rx_ring->size);
3291
3292 rx_ring->next_to_clean = 0;
3293 rx_ring->next_to_use = 0;
3294 }
3295
3296 /**
3297 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3298 * @adapter: board private structure
3299 **/
3300 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3301 {
3302 int i;
3303
3304 for (i = 0; i < adapter->num_rx_queues; i++)
3305 igb_clean_rx_ring(adapter->rx_ring[i]);
3306 }
3307
3308 /**
3309 * igb_set_mac - Change the Ethernet Address of the NIC
3310 * @netdev: network interface device structure
3311 * @p: pointer to an address structure
3312 *
3313 * Returns 0 on success, negative on failure
3314 **/
3315 static int igb_set_mac(struct net_device *netdev, void *p)
3316 {
3317 struct igb_adapter *adapter = netdev_priv(netdev);
3318 struct e1000_hw *hw = &adapter->hw;
3319 struct sockaddr *addr = p;
3320
3321 if (!is_valid_ether_addr(addr->sa_data))
3322 return -EADDRNOTAVAIL;
3323
3324 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3325 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3326
3327 /* set the correct pool for the new PF MAC address in entry 0 */
3328 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3329 adapter->vfs_allocated_count);
3330
3331 return 0;
3332 }
3333
3334 /**
3335 * igb_write_mc_addr_list - write multicast addresses to MTA
3336 * @netdev: network interface device structure
3337 *
3338 * Writes multicast address list to the MTA hash table.
3339 * Returns: -ENOMEM on failure
3340 * 0 on no addresses written
3341 * X on writing X addresses to MTA
3342 **/
3343 static int igb_write_mc_addr_list(struct net_device *netdev)
3344 {
3345 struct igb_adapter *adapter = netdev_priv(netdev);
3346 struct e1000_hw *hw = &adapter->hw;
3347 struct netdev_hw_addr *ha;
3348 u8 *mta_list;
3349 int i;
3350
3351 if (netdev_mc_empty(netdev)) {
3352 /* nothing to program, so clear mc list */
3353 igb_update_mc_addr_list(hw, NULL, 0);
3354 igb_restore_vf_multicasts(adapter);
3355 return 0;
3356 }
3357
3358 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3359 if (!mta_list)
3360 return -ENOMEM;
3361
3362 /* The shared function expects a packed array of only addresses. */
3363 i = 0;
3364 netdev_for_each_mc_addr(ha, netdev)
3365 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3366
3367 igb_update_mc_addr_list(hw, mta_list, i);
3368 kfree(mta_list);
3369
3370 return netdev_mc_count(netdev);
3371 }
3372
3373 /**
3374 * igb_write_uc_addr_list - write unicast addresses to RAR table
3375 * @netdev: network interface device structure
3376 *
3377 * Writes unicast address list to the RAR table.
3378 * Returns: -ENOMEM on failure/insufficient address space
3379 * 0 on no addresses written
3380 * X on writing X addresses to the RAR table
3381 **/
3382 static int igb_write_uc_addr_list(struct net_device *netdev)
3383 {
3384 struct igb_adapter *adapter = netdev_priv(netdev);
3385 struct e1000_hw *hw = &adapter->hw;
3386 unsigned int vfn = adapter->vfs_allocated_count;
3387 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3388 int count = 0;
3389
3390 /* return ENOMEM indicating insufficient memory for addresses */
3391 if (netdev_uc_count(netdev) > rar_entries)
3392 return -ENOMEM;
3393
3394 if (!netdev_uc_empty(netdev) && rar_entries) {
3395 struct netdev_hw_addr *ha;
3396
3397 netdev_for_each_uc_addr(ha, netdev) {
3398 if (!rar_entries)
3399 break;
3400 igb_rar_set_qsel(adapter, ha->addr,
3401 rar_entries--,
3402 vfn);
3403 count++;
3404 }
3405 }
3406 /* write the addresses in reverse order to avoid write combining */
3407 for (; rar_entries > 0 ; rar_entries--) {
3408 wr32(E1000_RAH(rar_entries), 0);
3409 wr32(E1000_RAL(rar_entries), 0);
3410 }
3411 wrfl();
3412
3413 return count;
3414 }
3415
3416 /**
3417 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3418 * @netdev: network interface device structure
3419 *
3420 * The set_rx_mode entry point is called whenever the unicast or multicast
3421 * address lists or the network interface flags are updated. This routine is
3422 * responsible for configuring the hardware for proper unicast, multicast,
3423 * promiscuous mode, and all-multi behavior.
3424 **/
3425 static void igb_set_rx_mode(struct net_device *netdev)
3426 {
3427 struct igb_adapter *adapter = netdev_priv(netdev);
3428 struct e1000_hw *hw = &adapter->hw;
3429 unsigned int vfn = adapter->vfs_allocated_count;
3430 u32 rctl, vmolr = 0;
3431 int count;
3432
3433 /* Check for Promiscuous and All Multicast modes */
3434 rctl = rd32(E1000_RCTL);
3435
3436 /* clear the effected bits */
3437 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3438
3439 if (netdev->flags & IFF_PROMISC) {
3440 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3441 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3442 } else {
3443 if (netdev->flags & IFF_ALLMULTI) {
3444 rctl |= E1000_RCTL_MPE;
3445 vmolr |= E1000_VMOLR_MPME;
3446 } else {
3447 /*
3448 * Write addresses to the MTA, if the attempt fails
3449 * then we should just turn on promiscuous mode so
3450 * that we can at least receive multicast traffic
3451 */
3452 count = igb_write_mc_addr_list(netdev);
3453 if (count < 0) {
3454 rctl |= E1000_RCTL_MPE;
3455 vmolr |= E1000_VMOLR_MPME;
3456 } else if (count) {
3457 vmolr |= E1000_VMOLR_ROMPE;
3458 }
3459 }
3460 /*
3461 * Write addresses to available RAR registers, if there is not
3462 * sufficient space to store all the addresses then enable
3463 * unicast promiscuous mode
3464 */
3465 count = igb_write_uc_addr_list(netdev);
3466 if (count < 0) {
3467 rctl |= E1000_RCTL_UPE;
3468 vmolr |= E1000_VMOLR_ROPE;
3469 }
3470 rctl |= E1000_RCTL_VFE;
3471 }
3472 wr32(E1000_RCTL, rctl);
3473
3474 /*
3475 * In order to support SR-IOV and eventually VMDq it is necessary to set
3476 * the VMOLR to enable the appropriate modes. Without this workaround
3477 * we will have issues with VLAN tag stripping not being done for frames
3478 * that are only arriving because we are the default pool
3479 */
3480 if (hw->mac.type < e1000_82576)
3481 return;
3482
3483 vmolr |= rd32(E1000_VMOLR(vfn)) &
3484 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3485 wr32(E1000_VMOLR(vfn), vmolr);
3486 igb_restore_vf_multicasts(adapter);
3487 }
3488
3489 static void igb_check_wvbr(struct igb_adapter *adapter)
3490 {
3491 struct e1000_hw *hw = &adapter->hw;
3492 u32 wvbr = 0;
3493
3494 switch (hw->mac.type) {
3495 case e1000_82576:
3496 case e1000_i350:
3497 if (!(wvbr = rd32(E1000_WVBR)))
3498 return;
3499 break;
3500 default:
3501 break;
3502 }
3503
3504 adapter->wvbr |= wvbr;
3505 }
3506
3507 #define IGB_STAGGERED_QUEUE_OFFSET 8
3508
3509 static void igb_spoof_check(struct igb_adapter *adapter)
3510 {
3511 int j;
3512
3513 if (!adapter->wvbr)
3514 return;
3515
3516 for(j = 0; j < adapter->vfs_allocated_count; j++) {
3517 if (adapter->wvbr & (1 << j) ||
3518 adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3519 dev_warn(&adapter->pdev->dev,
3520 "Spoof event(s) detected on VF %d\n", j);
3521 adapter->wvbr &=
3522 ~((1 << j) |
3523 (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3524 }
3525 }
3526 }
3527
3528 /* Need to wait a few seconds after link up to get diagnostic information from
3529 * the phy */
3530 static void igb_update_phy_info(unsigned long data)
3531 {
3532 struct igb_adapter *adapter = (struct igb_adapter *) data;
3533 igb_get_phy_info(&adapter->hw);
3534 }
3535
3536 /**
3537 * igb_has_link - check shared code for link and determine up/down
3538 * @adapter: pointer to driver private info
3539 **/
3540 bool igb_has_link(struct igb_adapter *adapter)
3541 {
3542 struct e1000_hw *hw = &adapter->hw;
3543 bool link_active = false;
3544 s32 ret_val = 0;
3545
3546 /* get_link_status is set on LSC (link status) interrupt or
3547 * rx sequence error interrupt. get_link_status will stay
3548 * false until the e1000_check_for_link establishes link
3549 * for copper adapters ONLY
3550 */
3551 switch (hw->phy.media_type) {
3552 case e1000_media_type_copper:
3553 if (hw->mac.get_link_status) {
3554 ret_val = hw->mac.ops.check_for_link(hw);
3555 link_active = !hw->mac.get_link_status;
3556 } else {
3557 link_active = true;
3558 }
3559 break;
3560 case e1000_media_type_internal_serdes:
3561 ret_val = hw->mac.ops.check_for_link(hw);
3562 link_active = hw->mac.serdes_has_link;
3563 break;
3564 default:
3565 case e1000_media_type_unknown:
3566 break;
3567 }
3568
3569 return link_active;
3570 }
3571
3572 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3573 {
3574 bool ret = false;
3575 u32 ctrl_ext, thstat;
3576
3577 /* check for thermal sensor event on i350, copper only */
3578 if (hw->mac.type == e1000_i350) {
3579 thstat = rd32(E1000_THSTAT);
3580 ctrl_ext = rd32(E1000_CTRL_EXT);
3581
3582 if ((hw->phy.media_type == e1000_media_type_copper) &&
3583 !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3584 ret = !!(thstat & event);
3585 }
3586 }
3587
3588 return ret;
3589 }
3590
3591 /**
3592 * igb_watchdog - Timer Call-back
3593 * @data: pointer to adapter cast into an unsigned long
3594 **/
3595 static void igb_watchdog(unsigned long data)
3596 {
3597 struct igb_adapter *adapter = (struct igb_adapter *)data;
3598 /* Do the rest outside of interrupt context */
3599 schedule_work(&adapter->watchdog_task);
3600 }
3601
3602 static void igb_watchdog_task(struct work_struct *work)
3603 {
3604 struct igb_adapter *adapter = container_of(work,
3605 struct igb_adapter,
3606 watchdog_task);
3607 struct e1000_hw *hw = &adapter->hw;
3608 struct net_device *netdev = adapter->netdev;
3609 u32 link;
3610 int i;
3611
3612 link = igb_has_link(adapter);
3613 if (link) {
3614 if (!netif_carrier_ok(netdev)) {
3615 u32 ctrl;
3616 hw->mac.ops.get_speed_and_duplex(hw,
3617 &adapter->link_speed,
3618 &adapter->link_duplex);
3619
3620 ctrl = rd32(E1000_CTRL);
3621 /* Links status message must follow this format */
3622 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3623 "Flow Control: %s\n",
3624 netdev->name,
3625 adapter->link_speed,
3626 adapter->link_duplex == FULL_DUPLEX ?
3627 "Full Duplex" : "Half Duplex",
3628 ((ctrl & E1000_CTRL_TFCE) &&
3629 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3630 ((ctrl & E1000_CTRL_RFCE) ? "RX" :
3631 ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None")));
3632
3633 /* check for thermal sensor event */
3634 if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3635 printk(KERN_INFO "igb: %s The network adapter "
3636 "link speed was downshifted "
3637 "because it overheated.\n",
3638 netdev->name);
3639 }
3640
3641 /* adjust timeout factor according to speed/duplex */
3642 adapter->tx_timeout_factor = 1;
3643 switch (adapter->link_speed) {
3644 case SPEED_10:
3645 adapter->tx_timeout_factor = 14;
3646 break;
3647 case SPEED_100:
3648 /* maybe add some timeout factor ? */
3649 break;
3650 }
3651
3652 netif_carrier_on(netdev);
3653
3654 igb_ping_all_vfs(adapter);
3655 igb_check_vf_rate_limit(adapter);
3656
3657 /* link state has changed, schedule phy info update */
3658 if (!test_bit(__IGB_DOWN, &adapter->state))
3659 mod_timer(&adapter->phy_info_timer,
3660 round_jiffies(jiffies + 2 * HZ));
3661 }
3662 } else {
3663 if (netif_carrier_ok(netdev)) {
3664 adapter->link_speed = 0;
3665 adapter->link_duplex = 0;
3666
3667 /* check for thermal sensor event */
3668 if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3669 printk(KERN_ERR "igb: %s The network adapter "
3670 "was stopped because it "
3671 "overheated.\n",
3672 netdev->name);
3673 }
3674
3675 /* Links status message must follow this format */
3676 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3677 netdev->name);
3678 netif_carrier_off(netdev);
3679
3680 igb_ping_all_vfs(adapter);
3681
3682 /* link state has changed, schedule phy info update */
3683 if (!test_bit(__IGB_DOWN, &adapter->state))
3684 mod_timer(&adapter->phy_info_timer,
3685 round_jiffies(jiffies + 2 * HZ));
3686 }
3687 }
3688
3689 spin_lock(&adapter->stats64_lock);
3690 igb_update_stats(adapter, &adapter->stats64);
3691 spin_unlock(&adapter->stats64_lock);
3692
3693 for (i = 0; i < adapter->num_tx_queues; i++) {
3694 struct igb_ring *tx_ring = adapter->tx_ring[i];
3695 if (!netif_carrier_ok(netdev)) {
3696 /* We've lost link, so the controller stops DMA,
3697 * but we've got queued Tx work that's never going
3698 * to get done, so reset controller to flush Tx.
3699 * (Do the reset outside of interrupt context). */
3700 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3701 adapter->tx_timeout_count++;
3702 schedule_work(&adapter->reset_task);
3703 /* return immediately since reset is imminent */
3704 return;
3705 }
3706 }
3707
3708 /* Force detection of hung controller every watchdog period */
3709 tx_ring->detect_tx_hung = true;
3710 }
3711
3712 /* Cause software interrupt to ensure rx ring is cleaned */
3713 if (adapter->msix_entries) {
3714 u32 eics = 0;
3715 for (i = 0; i < adapter->num_q_vectors; i++) {
3716 struct igb_q_vector *q_vector = adapter->q_vector[i];
3717 eics |= q_vector->eims_value;
3718 }
3719 wr32(E1000_EICS, eics);
3720 } else {
3721 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3722 }
3723
3724 igb_spoof_check(adapter);
3725
3726 /* Reset the timer */
3727 if (!test_bit(__IGB_DOWN, &adapter->state))
3728 mod_timer(&adapter->watchdog_timer,
3729 round_jiffies(jiffies + 2 * HZ));
3730 }
3731
3732 enum latency_range {
3733 lowest_latency = 0,
3734 low_latency = 1,
3735 bulk_latency = 2,
3736 latency_invalid = 255
3737 };
3738
3739 /**
3740 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3741 *
3742 * Stores a new ITR value based on strictly on packet size. This
3743 * algorithm is less sophisticated than that used in igb_update_itr,
3744 * due to the difficulty of synchronizing statistics across multiple
3745 * receive rings. The divisors and thresholds used by this function
3746 * were determined based on theoretical maximum wire speed and testing
3747 * data, in order to minimize response time while increasing bulk
3748 * throughput.
3749 * This functionality is controlled by the InterruptThrottleRate module
3750 * parameter (see igb_param.c)
3751 * NOTE: This function is called only when operating in a multiqueue
3752 * receive environment.
3753 * @q_vector: pointer to q_vector
3754 **/
3755 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3756 {
3757 int new_val = q_vector->itr_val;
3758 int avg_wire_size = 0;
3759 struct igb_adapter *adapter = q_vector->adapter;
3760 struct igb_ring *ring;
3761 unsigned int packets;
3762
3763 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3764 * ints/sec - ITR timer value of 120 ticks.
3765 */
3766 if (adapter->link_speed != SPEED_1000) {
3767 new_val = 976;
3768 goto set_itr_val;
3769 }
3770
3771 ring = q_vector->rx_ring;
3772 if (ring) {
3773 packets = ACCESS_ONCE(ring->total_packets);
3774
3775 if (packets)
3776 avg_wire_size = ring->total_bytes / packets;
3777 }
3778
3779 ring = q_vector->tx_ring;
3780 if (ring) {
3781 packets = ACCESS_ONCE(ring->total_packets);
3782
3783 if (packets)
3784 avg_wire_size = max_t(u32, avg_wire_size,
3785 ring->total_bytes / packets);
3786 }
3787
3788 /* if avg_wire_size isn't set no work was done */
3789 if (!avg_wire_size)
3790 goto clear_counts;
3791
3792 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3793 avg_wire_size += 24;
3794
3795 /* Don't starve jumbo frames */
3796 avg_wire_size = min(avg_wire_size, 3000);
3797
3798 /* Give a little boost to mid-size frames */
3799 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3800 new_val = avg_wire_size / 3;
3801 else
3802 new_val = avg_wire_size / 2;
3803
3804 /* when in itr mode 3 do not exceed 20K ints/sec */
3805 if (adapter->rx_itr_setting == 3 && new_val < 196)
3806 new_val = 196;
3807
3808 set_itr_val:
3809 if (new_val != q_vector->itr_val) {
3810 q_vector->itr_val = new_val;
3811 q_vector->set_itr = 1;
3812 }
3813 clear_counts:
3814 if (q_vector->rx_ring) {
3815 q_vector->rx_ring->total_bytes = 0;
3816 q_vector->rx_ring->total_packets = 0;
3817 }
3818 if (q_vector->tx_ring) {
3819 q_vector->tx_ring->total_bytes = 0;
3820 q_vector->tx_ring->total_packets = 0;
3821 }
3822 }
3823
3824 /**
3825 * igb_update_itr - update the dynamic ITR value based on statistics
3826 * Stores a new ITR value based on packets and byte
3827 * counts during the last interrupt. The advantage of per interrupt
3828 * computation is faster updates and more accurate ITR for the current
3829 * traffic pattern. Constants in this function were computed
3830 * based on theoretical maximum wire speed and thresholds were set based
3831 * on testing data as well as attempting to minimize response time
3832 * while increasing bulk throughput.
3833 * this functionality is controlled by the InterruptThrottleRate module
3834 * parameter (see igb_param.c)
3835 * NOTE: These calculations are only valid when operating in a single-
3836 * queue environment.
3837 * @adapter: pointer to adapter
3838 * @itr_setting: current q_vector->itr_val
3839 * @packets: the number of packets during this measurement interval
3840 * @bytes: the number of bytes during this measurement interval
3841 **/
3842 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3843 int packets, int bytes)
3844 {
3845 unsigned int retval = itr_setting;
3846
3847 if (packets == 0)
3848 goto update_itr_done;
3849
3850 switch (itr_setting) {
3851 case lowest_latency:
3852 /* handle TSO and jumbo frames */
3853 if (bytes/packets > 8000)
3854 retval = bulk_latency;
3855 else if ((packets < 5) && (bytes > 512))
3856 retval = low_latency;
3857 break;
3858 case low_latency: /* 50 usec aka 20000 ints/s */
3859 if (bytes > 10000) {
3860 /* this if handles the TSO accounting */
3861 if (bytes/packets > 8000) {
3862 retval = bulk_latency;
3863 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3864 retval = bulk_latency;
3865 } else if ((packets > 35)) {
3866 retval = lowest_latency;
3867 }
3868 } else if (bytes/packets > 2000) {
3869 retval = bulk_latency;
3870 } else if (packets <= 2 && bytes < 512) {
3871 retval = lowest_latency;
3872 }
3873 break;
3874 case bulk_latency: /* 250 usec aka 4000 ints/s */
3875 if (bytes > 25000) {
3876 if (packets > 35)
3877 retval = low_latency;
3878 } else if (bytes < 1500) {
3879 retval = low_latency;
3880 }
3881 break;
3882 }
3883
3884 update_itr_done:
3885 return retval;
3886 }
3887
3888 static void igb_set_itr(struct igb_adapter *adapter)
3889 {
3890 struct igb_q_vector *q_vector = adapter->q_vector[0];
3891 u16 current_itr;
3892 u32 new_itr = q_vector->itr_val;
3893
3894 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3895 if (adapter->link_speed != SPEED_1000) {
3896 current_itr = 0;
3897 new_itr = 4000;
3898 goto set_itr_now;
3899 }
3900
3901 adapter->rx_itr = igb_update_itr(adapter,
3902 adapter->rx_itr,
3903 q_vector->rx_ring->total_packets,
3904 q_vector->rx_ring->total_bytes);
3905
3906 adapter->tx_itr = igb_update_itr(adapter,
3907 adapter->tx_itr,
3908 q_vector->tx_ring->total_packets,
3909 q_vector->tx_ring->total_bytes);
3910 current_itr = max(adapter->rx_itr, adapter->tx_itr);
3911
3912 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3913 if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3914 current_itr = low_latency;
3915
3916 switch (current_itr) {
3917 /* counts and packets in update_itr are dependent on these numbers */
3918 case lowest_latency:
3919 new_itr = 56; /* aka 70,000 ints/sec */
3920 break;
3921 case low_latency:
3922 new_itr = 196; /* aka 20,000 ints/sec */
3923 break;
3924 case bulk_latency:
3925 new_itr = 980; /* aka 4,000 ints/sec */
3926 break;
3927 default:
3928 break;
3929 }
3930
3931 set_itr_now:
3932 q_vector->rx_ring->total_bytes = 0;
3933 q_vector->rx_ring->total_packets = 0;
3934 q_vector->tx_ring->total_bytes = 0;
3935 q_vector->tx_ring->total_packets = 0;
3936
3937 if (new_itr != q_vector->itr_val) {
3938 /* this attempts to bias the interrupt rate towards Bulk
3939 * by adding intermediate steps when interrupt rate is
3940 * increasing */
3941 new_itr = new_itr > q_vector->itr_val ?
3942 max((new_itr * q_vector->itr_val) /
3943 (new_itr + (q_vector->itr_val >> 2)),
3944 new_itr) :
3945 new_itr;
3946 /* Don't write the value here; it resets the adapter's
3947 * internal timer, and causes us to delay far longer than
3948 * we should between interrupts. Instead, we write the ITR
3949 * value at the beginning of the next interrupt so the timing
3950 * ends up being correct.
3951 */
3952 q_vector->itr_val = new_itr;
3953 q_vector->set_itr = 1;
3954 }
3955 }
3956
3957 void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
3958 u32 type_tucmd, u32 mss_l4len_idx)
3959 {
3960 struct e1000_adv_tx_context_desc *context_desc;
3961 u16 i = tx_ring->next_to_use;
3962
3963 context_desc = IGB_TX_CTXTDESC(tx_ring, i);
3964
3965 i++;
3966 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
3967
3968 /* set bits to identify this as an advanced context descriptor */
3969 type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3970
3971 /* For 82575, context index must be unique per ring. */
3972 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3973 mss_l4len_idx |= tx_ring->reg_idx << 4;
3974
3975 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens);
3976 context_desc->seqnum_seed = 0;
3977 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd);
3978 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3979 }
3980
3981 static inline int igb_tso(struct igb_ring *tx_ring, struct sk_buff *skb,
3982 u32 tx_flags, __be16 protocol, u8 *hdr_len)
3983 {
3984 int err;
3985 u32 vlan_macip_lens, type_tucmd;
3986 u32 mss_l4len_idx, l4len;
3987
3988 if (!skb_is_gso(skb))
3989 return 0;
3990
3991 if (skb_header_cloned(skb)) {
3992 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3993 if (err)
3994 return err;
3995 }
3996
3997 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3998 type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
3999
4000 if (protocol == __constant_htons(ETH_P_IP)) {
4001 struct iphdr *iph = ip_hdr(skb);
4002 iph->tot_len = 0;
4003 iph->check = 0;
4004 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4005 iph->daddr, 0,
4006 IPPROTO_TCP,
4007 0);
4008 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4009 } else if (skb_is_gso_v6(skb)) {
4010 ipv6_hdr(skb)->payload_len = 0;
4011 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4012 &ipv6_hdr(skb)->daddr,
4013 0, IPPROTO_TCP, 0);
4014 }
4015
4016 l4len = tcp_hdrlen(skb);
4017 *hdr_len = skb_transport_offset(skb) + l4len;
4018
4019 /* MSS L4LEN IDX */
4020 mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4021 mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4022
4023 /* VLAN MACLEN IPLEN */
4024 vlan_macip_lens = skb_network_header_len(skb);
4025 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4026 vlan_macip_lens |= tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4027
4028 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4029
4030 return 1;
4031 }
4032
4033 static inline bool igb_tx_csum(struct igb_ring *tx_ring, struct sk_buff *skb,
4034 u32 tx_flags, __be16 protocol)
4035 {
4036 u32 vlan_macip_lens = 0;
4037 u32 mss_l4len_idx = 0;
4038 u32 type_tucmd = 0;
4039
4040 if (skb->ip_summed != CHECKSUM_PARTIAL) {
4041 if (!(tx_flags & IGB_TX_FLAGS_VLAN))
4042 return false;
4043 } else {
4044 u8 l4_hdr = 0;
4045 switch (protocol) {
4046 case __constant_htons(ETH_P_IP):
4047 vlan_macip_lens |= skb_network_header_len(skb);
4048 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4049 l4_hdr = ip_hdr(skb)->protocol;
4050 break;
4051 case __constant_htons(ETH_P_IPV6):
4052 vlan_macip_lens |= skb_network_header_len(skb);
4053 l4_hdr = ipv6_hdr(skb)->nexthdr;
4054 break;
4055 default:
4056 if (unlikely(net_ratelimit())) {
4057 dev_warn(tx_ring->dev,
4058 "partial checksum but proto=%x!\n",
4059 protocol);
4060 }
4061 break;
4062 }
4063
4064 switch (l4_hdr) {
4065 case IPPROTO_TCP:
4066 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4067 mss_l4len_idx = tcp_hdrlen(skb) <<
4068 E1000_ADVTXD_L4LEN_SHIFT;
4069 break;
4070 case IPPROTO_SCTP:
4071 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4072 mss_l4len_idx = sizeof(struct sctphdr) <<
4073 E1000_ADVTXD_L4LEN_SHIFT;
4074 break;
4075 case IPPROTO_UDP:
4076 mss_l4len_idx = sizeof(struct udphdr) <<
4077 E1000_ADVTXD_L4LEN_SHIFT;
4078 break;
4079 default:
4080 if (unlikely(net_ratelimit())) {
4081 dev_warn(tx_ring->dev,
4082 "partial checksum but l4 proto=%x!\n",
4083 l4_hdr);
4084 }
4085 break;
4086 }
4087 }
4088
4089 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4090 vlan_macip_lens |= tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4091
4092 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4093
4094 return (skb->ip_summed == CHECKSUM_PARTIAL);
4095 }
4096
4097 static __le32 igb_tx_cmd_type(u32 tx_flags)
4098 {
4099 /* set type for advanced descriptor with frame checksum insertion */
4100 __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4101 E1000_ADVTXD_DCMD_IFCS |
4102 E1000_ADVTXD_DCMD_DEXT);
4103
4104 /* set HW vlan bit if vlan is present */
4105 if (tx_flags & IGB_TX_FLAGS_VLAN)
4106 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4107
4108 /* set timestamp bit if present */
4109 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4110 cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4111
4112 /* set segmentation bits for TSO */
4113 if (tx_flags & IGB_TX_FLAGS_TSO)
4114 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4115
4116 return cmd_type;
4117 }
4118
4119 static __le32 igb_tx_olinfo_status(u32 tx_flags, unsigned int paylen,
4120 struct igb_ring *tx_ring)
4121 {
4122 u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4123
4124 /* 82575 requires a unique index per ring if any offload is enabled */
4125 if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4126 (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX))
4127 olinfo_status |= tx_ring->reg_idx << 4;
4128
4129 /* insert L4 checksum */
4130 if (tx_flags & IGB_TX_FLAGS_CSUM) {
4131 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4132
4133 /* insert IPv4 checksum */
4134 if (tx_flags & IGB_TX_FLAGS_IPV4)
4135 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4136 }
4137
4138 return cpu_to_le32(olinfo_status);
4139 }
4140
4141 #define IGB_MAX_TXD_PWR 16
4142 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
4143
4144 static inline int igb_tx_map(struct igb_ring *tx_ring, struct sk_buff *skb,
4145 struct igb_tx_buffer *first, u32 tx_flags)
4146 {
4147 struct igb_tx_buffer *buffer_info;
4148 struct device *dev = tx_ring->dev;
4149 unsigned int hlen = skb_headlen(skb);
4150 unsigned int count = 0, i;
4151 unsigned int f;
4152 u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
4153
4154 i = tx_ring->next_to_use;
4155
4156 buffer_info = &tx_ring->tx_buffer_info[i];
4157 BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
4158 buffer_info->length = hlen;
4159 buffer_info->tx_flags = tx_flags;
4160 buffer_info->dma = dma_map_single(dev, skb->data, hlen,
4161 DMA_TO_DEVICE);
4162 if (dma_mapping_error(dev, buffer_info->dma))
4163 goto dma_error;
4164
4165 tx_flags |= IGB_TX_FLAGS_MAPPED_AS_PAGE;
4166
4167 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
4168 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
4169 unsigned int len = frag->size;
4170
4171 count++;
4172 i++;
4173 if (i == tx_ring->count)
4174 i = 0;
4175
4176 buffer_info = &tx_ring->tx_buffer_info[i];
4177 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
4178 buffer_info->length = len;
4179 buffer_info->tx_flags = tx_flags;
4180 buffer_info->dma = skb_frag_dma_map(dev, frag, 0, len,
4181 DMA_TO_DEVICE);
4182 if (dma_mapping_error(dev, buffer_info->dma))
4183 goto dma_error;
4184
4185 }
4186
4187 buffer_info->skb = skb;
4188 /* multiply data chunks by size of headers */
4189 buffer_info->bytecount = ((gso_segs - 1) * hlen) + skb->len;
4190 buffer_info->gso_segs = gso_segs;
4191
4192 /* set the timestamp */
4193 first->time_stamp = jiffies;
4194
4195 /* set next_to_watch value indicating a packet is present */
4196 first->next_to_watch = IGB_TX_DESC(tx_ring, i);
4197
4198 return ++count;
4199
4200 dma_error:
4201 dev_err(dev, "TX DMA map failed\n");
4202
4203 /* clear timestamp and dma mappings for failed buffer_info mapping */
4204 buffer_info->dma = 0;
4205 buffer_info->time_stamp = 0;
4206 buffer_info->length = 0;
4207
4208 /* clear timestamp and dma mappings for remaining portion of packet */
4209 while (count--) {
4210 if (i == 0)
4211 i = tx_ring->count;
4212 i--;
4213 buffer_info = &tx_ring->tx_buffer_info[i];
4214 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4215 }
4216
4217 return 0;
4218 }
4219
4220 static inline void igb_tx_queue(struct igb_ring *tx_ring,
4221 u32 tx_flags, int count, u32 paylen,
4222 u8 hdr_len)
4223 {
4224 union e1000_adv_tx_desc *tx_desc;
4225 struct igb_tx_buffer *buffer_info;
4226 __le32 olinfo_status, cmd_type;
4227 unsigned int i = tx_ring->next_to_use;
4228
4229 cmd_type = igb_tx_cmd_type(tx_flags);
4230 olinfo_status = igb_tx_olinfo_status(tx_flags,
4231 paylen - hdr_len,
4232 tx_ring);
4233
4234 do {
4235 buffer_info = &tx_ring->tx_buffer_info[i];
4236 tx_desc = IGB_TX_DESC(tx_ring, i);
4237 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4238 tx_desc->read.cmd_type_len = cmd_type |
4239 cpu_to_le32(buffer_info->length);
4240 tx_desc->read.olinfo_status = olinfo_status;
4241 count--;
4242 i++;
4243 if (i == tx_ring->count)
4244 i = 0;
4245 } while (count > 0);
4246
4247 tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_TXD_DCMD);
4248 /* Force memory writes to complete before letting h/w
4249 * know there are new descriptors to fetch. (Only
4250 * applicable for weak-ordered memory model archs,
4251 * such as IA-64). */
4252 wmb();
4253
4254 tx_ring->next_to_use = i;
4255 writel(i, tx_ring->tail);
4256 /* we need this if more than one processor can write to our tail
4257 * at a time, it syncronizes IO on IA64/Altix systems */
4258 mmiowb();
4259 }
4260
4261 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4262 {
4263 struct net_device *netdev = tx_ring->netdev;
4264
4265 netif_stop_subqueue(netdev, tx_ring->queue_index);
4266
4267 /* Herbert's original patch had:
4268 * smp_mb__after_netif_stop_queue();
4269 * but since that doesn't exist yet, just open code it. */
4270 smp_mb();
4271
4272 /* We need to check again in a case another CPU has just
4273 * made room available. */
4274 if (igb_desc_unused(tx_ring) < size)
4275 return -EBUSY;
4276
4277 /* A reprieve! */
4278 netif_wake_subqueue(netdev, tx_ring->queue_index);
4279
4280 u64_stats_update_begin(&tx_ring->tx_syncp2);
4281 tx_ring->tx_stats.restart_queue2++;
4282 u64_stats_update_end(&tx_ring->tx_syncp2);
4283
4284 return 0;
4285 }
4286
4287 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4288 {
4289 if (igb_desc_unused(tx_ring) >= size)
4290 return 0;
4291 return __igb_maybe_stop_tx(tx_ring, size);
4292 }
4293
4294 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4295 struct igb_ring *tx_ring)
4296 {
4297 struct igb_tx_buffer *first;
4298 int tso, count;
4299 u32 tx_flags = 0;
4300 __be16 protocol = vlan_get_protocol(skb);
4301 u8 hdr_len = 0;
4302
4303 /* need: 1 descriptor per page,
4304 * + 2 desc gap to keep tail from touching head,
4305 * + 1 desc for skb->data,
4306 * + 1 desc for context descriptor,
4307 * otherwise try next time */
4308 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4309 /* this is a hard error */
4310 return NETDEV_TX_BUSY;
4311 }
4312
4313 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4314 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4315 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4316 }
4317
4318 if (vlan_tx_tag_present(skb)) {
4319 tx_flags |= IGB_TX_FLAGS_VLAN;
4320 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4321 }
4322
4323 /* record the location of the first descriptor for this packet */
4324 first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4325
4326 tso = igb_tso(tx_ring, skb, tx_flags, protocol, &hdr_len);
4327 if (tso < 0) {
4328 goto out_drop;
4329 } else if (tso) {
4330 tx_flags |= IGB_TX_FLAGS_TSO | IGB_TX_FLAGS_CSUM;
4331 if (protocol == htons(ETH_P_IP))
4332 tx_flags |= IGB_TX_FLAGS_IPV4;
4333 } else if (igb_tx_csum(tx_ring, skb, tx_flags, protocol) &&
4334 (skb->ip_summed == CHECKSUM_PARTIAL)) {
4335 tx_flags |= IGB_TX_FLAGS_CSUM;
4336 }
4337
4338 /*
4339 * count reflects descriptors mapped, if 0 or less then mapping error
4340 * has occurred and we need to rewind the descriptor queue
4341 */
4342 count = igb_tx_map(tx_ring, skb, first, tx_flags);
4343 if (!count) {
4344 dev_kfree_skb_any(skb);
4345 first->time_stamp = 0;
4346 tx_ring->next_to_use = first - tx_ring->tx_buffer_info;
4347 return NETDEV_TX_OK;
4348 }
4349
4350 igb_tx_queue(tx_ring, tx_flags, count, skb->len, hdr_len);
4351
4352 /* Make sure there is space in the ring for the next send. */
4353 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4354
4355 return NETDEV_TX_OK;
4356
4357 out_drop:
4358 dev_kfree_skb_any(skb);
4359 return NETDEV_TX_OK;
4360 }
4361
4362 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4363 struct sk_buff *skb)
4364 {
4365 unsigned int r_idx = skb->queue_mapping;
4366
4367 if (r_idx >= adapter->num_tx_queues)
4368 r_idx = r_idx % adapter->num_tx_queues;
4369
4370 return adapter->tx_ring[r_idx];
4371 }
4372
4373 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4374 struct net_device *netdev)
4375 {
4376 struct igb_adapter *adapter = netdev_priv(netdev);
4377
4378 if (test_bit(__IGB_DOWN, &adapter->state)) {
4379 dev_kfree_skb_any(skb);
4380 return NETDEV_TX_OK;
4381 }
4382
4383 if (skb->len <= 0) {
4384 dev_kfree_skb_any(skb);
4385 return NETDEV_TX_OK;
4386 }
4387
4388 /*
4389 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4390 * in order to meet this minimum size requirement.
4391 */
4392 if (skb->len < 17) {
4393 if (skb_padto(skb, 17))
4394 return NETDEV_TX_OK;
4395 skb->len = 17;
4396 }
4397
4398 return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4399 }
4400
4401 /**
4402 * igb_tx_timeout - Respond to a Tx Hang
4403 * @netdev: network interface device structure
4404 **/
4405 static void igb_tx_timeout(struct net_device *netdev)
4406 {
4407 struct igb_adapter *adapter = netdev_priv(netdev);
4408 struct e1000_hw *hw = &adapter->hw;
4409
4410 /* Do the reset outside of interrupt context */
4411 adapter->tx_timeout_count++;
4412
4413 if (hw->mac.type == e1000_82580)
4414 hw->dev_spec._82575.global_device_reset = true;
4415
4416 schedule_work(&adapter->reset_task);
4417 wr32(E1000_EICS,
4418 (adapter->eims_enable_mask & ~adapter->eims_other));
4419 }
4420
4421 static void igb_reset_task(struct work_struct *work)
4422 {
4423 struct igb_adapter *adapter;
4424 adapter = container_of(work, struct igb_adapter, reset_task);
4425
4426 igb_dump(adapter);
4427 netdev_err(adapter->netdev, "Reset adapter\n");
4428 igb_reinit_locked(adapter);
4429 }
4430
4431 /**
4432 * igb_get_stats64 - Get System Network Statistics
4433 * @netdev: network interface device structure
4434 * @stats: rtnl_link_stats64 pointer
4435 *
4436 **/
4437 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4438 struct rtnl_link_stats64 *stats)
4439 {
4440 struct igb_adapter *adapter = netdev_priv(netdev);
4441
4442 spin_lock(&adapter->stats64_lock);
4443 igb_update_stats(adapter, &adapter->stats64);
4444 memcpy(stats, &adapter->stats64, sizeof(*stats));
4445 spin_unlock(&adapter->stats64_lock);
4446
4447 return stats;
4448 }
4449
4450 /**
4451 * igb_change_mtu - Change the Maximum Transfer Unit
4452 * @netdev: network interface device structure
4453 * @new_mtu: new value for maximum frame size
4454 *
4455 * Returns 0 on success, negative on failure
4456 **/
4457 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4458 {
4459 struct igb_adapter *adapter = netdev_priv(netdev);
4460 struct pci_dev *pdev = adapter->pdev;
4461 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4462
4463 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4464 dev_err(&pdev->dev, "Invalid MTU setting\n");
4465 return -EINVAL;
4466 }
4467
4468 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4469 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4470 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4471 return -EINVAL;
4472 }
4473
4474 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4475 msleep(1);
4476
4477 /* igb_down has a dependency on max_frame_size */
4478 adapter->max_frame_size = max_frame;
4479
4480 if (netif_running(netdev))
4481 igb_down(adapter);
4482
4483 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4484 netdev->mtu, new_mtu);
4485 netdev->mtu = new_mtu;
4486
4487 if (netif_running(netdev))
4488 igb_up(adapter);
4489 else
4490 igb_reset(adapter);
4491
4492 clear_bit(__IGB_RESETTING, &adapter->state);
4493
4494 return 0;
4495 }
4496
4497 /**
4498 * igb_update_stats - Update the board statistics counters
4499 * @adapter: board private structure
4500 **/
4501
4502 void igb_update_stats(struct igb_adapter *adapter,
4503 struct rtnl_link_stats64 *net_stats)
4504 {
4505 struct e1000_hw *hw = &adapter->hw;
4506 struct pci_dev *pdev = adapter->pdev;
4507 u32 reg, mpc;
4508 u16 phy_tmp;
4509 int i;
4510 u64 bytes, packets;
4511 unsigned int start;
4512 u64 _bytes, _packets;
4513
4514 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4515
4516 /*
4517 * Prevent stats update while adapter is being reset, or if the pci
4518 * connection is down.
4519 */
4520 if (adapter->link_speed == 0)
4521 return;
4522 if (pci_channel_offline(pdev))
4523 return;
4524
4525 bytes = 0;
4526 packets = 0;
4527 for (i = 0; i < adapter->num_rx_queues; i++) {
4528 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4529 struct igb_ring *ring = adapter->rx_ring[i];
4530
4531 ring->rx_stats.drops += rqdpc_tmp;
4532 net_stats->rx_fifo_errors += rqdpc_tmp;
4533
4534 do {
4535 start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4536 _bytes = ring->rx_stats.bytes;
4537 _packets = ring->rx_stats.packets;
4538 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4539 bytes += _bytes;
4540 packets += _packets;
4541 }
4542
4543 net_stats->rx_bytes = bytes;
4544 net_stats->rx_packets = packets;
4545
4546 bytes = 0;
4547 packets = 0;
4548 for (i = 0; i < adapter->num_tx_queues; i++) {
4549 struct igb_ring *ring = adapter->tx_ring[i];
4550 do {
4551 start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4552 _bytes = ring->tx_stats.bytes;
4553 _packets = ring->tx_stats.packets;
4554 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4555 bytes += _bytes;
4556 packets += _packets;
4557 }
4558 net_stats->tx_bytes = bytes;
4559 net_stats->tx_packets = packets;
4560
4561 /* read stats registers */
4562 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4563 adapter->stats.gprc += rd32(E1000_GPRC);
4564 adapter->stats.gorc += rd32(E1000_GORCL);
4565 rd32(E1000_GORCH); /* clear GORCL */
4566 adapter->stats.bprc += rd32(E1000_BPRC);
4567 adapter->stats.mprc += rd32(E1000_MPRC);
4568 adapter->stats.roc += rd32(E1000_ROC);
4569
4570 adapter->stats.prc64 += rd32(E1000_PRC64);
4571 adapter->stats.prc127 += rd32(E1000_PRC127);
4572 adapter->stats.prc255 += rd32(E1000_PRC255);
4573 adapter->stats.prc511 += rd32(E1000_PRC511);
4574 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4575 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4576 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4577 adapter->stats.sec += rd32(E1000_SEC);
4578
4579 mpc = rd32(E1000_MPC);
4580 adapter->stats.mpc += mpc;
4581 net_stats->rx_fifo_errors += mpc;
4582 adapter->stats.scc += rd32(E1000_SCC);
4583 adapter->stats.ecol += rd32(E1000_ECOL);
4584 adapter->stats.mcc += rd32(E1000_MCC);
4585 adapter->stats.latecol += rd32(E1000_LATECOL);
4586 adapter->stats.dc += rd32(E1000_DC);
4587 adapter->stats.rlec += rd32(E1000_RLEC);
4588 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4589 adapter->stats.xontxc += rd32(E1000_XONTXC);
4590 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4591 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4592 adapter->stats.fcruc += rd32(E1000_FCRUC);
4593 adapter->stats.gptc += rd32(E1000_GPTC);
4594 adapter->stats.gotc += rd32(E1000_GOTCL);
4595 rd32(E1000_GOTCH); /* clear GOTCL */
4596 adapter->stats.rnbc += rd32(E1000_RNBC);
4597 adapter->stats.ruc += rd32(E1000_RUC);
4598 adapter->stats.rfc += rd32(E1000_RFC);
4599 adapter->stats.rjc += rd32(E1000_RJC);
4600 adapter->stats.tor += rd32(E1000_TORH);
4601 adapter->stats.tot += rd32(E1000_TOTH);
4602 adapter->stats.tpr += rd32(E1000_TPR);
4603
4604 adapter->stats.ptc64 += rd32(E1000_PTC64);
4605 adapter->stats.ptc127 += rd32(E1000_PTC127);
4606 adapter->stats.ptc255 += rd32(E1000_PTC255);
4607 adapter->stats.ptc511 += rd32(E1000_PTC511);
4608 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4609 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4610
4611 adapter->stats.mptc += rd32(E1000_MPTC);
4612 adapter->stats.bptc += rd32(E1000_BPTC);
4613
4614 adapter->stats.tpt += rd32(E1000_TPT);
4615 adapter->stats.colc += rd32(E1000_COLC);
4616
4617 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4618 /* read internal phy specific stats */
4619 reg = rd32(E1000_CTRL_EXT);
4620 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4621 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4622 adapter->stats.tncrs += rd32(E1000_TNCRS);
4623 }
4624
4625 adapter->stats.tsctc += rd32(E1000_TSCTC);
4626 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4627
4628 adapter->stats.iac += rd32(E1000_IAC);
4629 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4630 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4631 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4632 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4633 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4634 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4635 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4636 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4637
4638 /* Fill out the OS statistics structure */
4639 net_stats->multicast = adapter->stats.mprc;
4640 net_stats->collisions = adapter->stats.colc;
4641
4642 /* Rx Errors */
4643
4644 /* RLEC on some newer hardware can be incorrect so build
4645 * our own version based on RUC and ROC */
4646 net_stats->rx_errors = adapter->stats.rxerrc +
4647 adapter->stats.crcerrs + adapter->stats.algnerrc +
4648 adapter->stats.ruc + adapter->stats.roc +
4649 adapter->stats.cexterr;
4650 net_stats->rx_length_errors = adapter->stats.ruc +
4651 adapter->stats.roc;
4652 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4653 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4654 net_stats->rx_missed_errors = adapter->stats.mpc;
4655
4656 /* Tx Errors */
4657 net_stats->tx_errors = adapter->stats.ecol +
4658 adapter->stats.latecol;
4659 net_stats->tx_aborted_errors = adapter->stats.ecol;
4660 net_stats->tx_window_errors = adapter->stats.latecol;
4661 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4662
4663 /* Tx Dropped needs to be maintained elsewhere */
4664
4665 /* Phy Stats */
4666 if (hw->phy.media_type == e1000_media_type_copper) {
4667 if ((adapter->link_speed == SPEED_1000) &&
4668 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4669 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4670 adapter->phy_stats.idle_errors += phy_tmp;
4671 }
4672 }
4673
4674 /* Management Stats */
4675 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4676 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4677 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4678
4679 /* OS2BMC Stats */
4680 reg = rd32(E1000_MANC);
4681 if (reg & E1000_MANC_EN_BMC2OS) {
4682 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4683 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4684 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4685 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4686 }
4687 }
4688
4689 static irqreturn_t igb_msix_other(int irq, void *data)
4690 {
4691 struct igb_adapter *adapter = data;
4692 struct e1000_hw *hw = &adapter->hw;
4693 u32 icr = rd32(E1000_ICR);
4694 /* reading ICR causes bit 31 of EICR to be cleared */
4695
4696 if (icr & E1000_ICR_DRSTA)
4697 schedule_work(&adapter->reset_task);
4698
4699 if (icr & E1000_ICR_DOUTSYNC) {
4700 /* HW is reporting DMA is out of sync */
4701 adapter->stats.doosync++;
4702 /* The DMA Out of Sync is also indication of a spoof event
4703 * in IOV mode. Check the Wrong VM Behavior register to
4704 * see if it is really a spoof event. */
4705 igb_check_wvbr(adapter);
4706 }
4707
4708 /* Check for a mailbox event */
4709 if (icr & E1000_ICR_VMMB)
4710 igb_msg_task(adapter);
4711
4712 if (icr & E1000_ICR_LSC) {
4713 hw->mac.get_link_status = 1;
4714 /* guard against interrupt when we're going down */
4715 if (!test_bit(__IGB_DOWN, &adapter->state))
4716 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4717 }
4718
4719 if (adapter->vfs_allocated_count)
4720 wr32(E1000_IMS, E1000_IMS_LSC |
4721 E1000_IMS_VMMB |
4722 E1000_IMS_DOUTSYNC);
4723 else
4724 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4725 wr32(E1000_EIMS, adapter->eims_other);
4726
4727 return IRQ_HANDLED;
4728 }
4729
4730 static void igb_write_itr(struct igb_q_vector *q_vector)
4731 {
4732 struct igb_adapter *adapter = q_vector->adapter;
4733 u32 itr_val = q_vector->itr_val & 0x7FFC;
4734
4735 if (!q_vector->set_itr)
4736 return;
4737
4738 if (!itr_val)
4739 itr_val = 0x4;
4740
4741 if (adapter->hw.mac.type == e1000_82575)
4742 itr_val |= itr_val << 16;
4743 else
4744 itr_val |= 0x8000000;
4745
4746 writel(itr_val, q_vector->itr_register);
4747 q_vector->set_itr = 0;
4748 }
4749
4750 static irqreturn_t igb_msix_ring(int irq, void *data)
4751 {
4752 struct igb_q_vector *q_vector = data;
4753
4754 /* Write the ITR value calculated from the previous interrupt. */
4755 igb_write_itr(q_vector);
4756
4757 napi_schedule(&q_vector->napi);
4758
4759 return IRQ_HANDLED;
4760 }
4761
4762 #ifdef CONFIG_IGB_DCA
4763 static void igb_update_dca(struct igb_q_vector *q_vector)
4764 {
4765 struct igb_adapter *adapter = q_vector->adapter;
4766 struct e1000_hw *hw = &adapter->hw;
4767 int cpu = get_cpu();
4768
4769 if (q_vector->cpu == cpu)
4770 goto out_no_update;
4771
4772 if (q_vector->tx_ring) {
4773 int q = q_vector->tx_ring->reg_idx;
4774 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4775 if (hw->mac.type == e1000_82575) {
4776 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4777 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4778 } else {
4779 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4780 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4781 E1000_DCA_TXCTRL_CPUID_SHIFT;
4782 }
4783 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4784 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4785 }
4786 if (q_vector->rx_ring) {
4787 int q = q_vector->rx_ring->reg_idx;
4788 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4789 if (hw->mac.type == e1000_82575) {
4790 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4791 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4792 } else {
4793 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4794 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4795 E1000_DCA_RXCTRL_CPUID_SHIFT;
4796 }
4797 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4798 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4799 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4800 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4801 }
4802 q_vector->cpu = cpu;
4803 out_no_update:
4804 put_cpu();
4805 }
4806
4807 static void igb_setup_dca(struct igb_adapter *adapter)
4808 {
4809 struct e1000_hw *hw = &adapter->hw;
4810 int i;
4811
4812 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4813 return;
4814
4815 /* Always use CB2 mode, difference is masked in the CB driver. */
4816 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4817
4818 for (i = 0; i < adapter->num_q_vectors; i++) {
4819 adapter->q_vector[i]->cpu = -1;
4820 igb_update_dca(adapter->q_vector[i]);
4821 }
4822 }
4823
4824 static int __igb_notify_dca(struct device *dev, void *data)
4825 {
4826 struct net_device *netdev = dev_get_drvdata(dev);
4827 struct igb_adapter *adapter = netdev_priv(netdev);
4828 struct pci_dev *pdev = adapter->pdev;
4829 struct e1000_hw *hw = &adapter->hw;
4830 unsigned long event = *(unsigned long *)data;
4831
4832 switch (event) {
4833 case DCA_PROVIDER_ADD:
4834 /* if already enabled, don't do it again */
4835 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4836 break;
4837 if (dca_add_requester(dev) == 0) {
4838 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4839 dev_info(&pdev->dev, "DCA enabled\n");
4840 igb_setup_dca(adapter);
4841 break;
4842 }
4843 /* Fall Through since DCA is disabled. */
4844 case DCA_PROVIDER_REMOVE:
4845 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4846 /* without this a class_device is left
4847 * hanging around in the sysfs model */
4848 dca_remove_requester(dev);
4849 dev_info(&pdev->dev, "DCA disabled\n");
4850 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4851 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4852 }
4853 break;
4854 }
4855
4856 return 0;
4857 }
4858
4859 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4860 void *p)
4861 {
4862 int ret_val;
4863
4864 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4865 __igb_notify_dca);
4866
4867 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4868 }
4869 #endif /* CONFIG_IGB_DCA */
4870
4871 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4872 {
4873 struct e1000_hw *hw = &adapter->hw;
4874 u32 ping;
4875 int i;
4876
4877 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4878 ping = E1000_PF_CONTROL_MSG;
4879 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4880 ping |= E1000_VT_MSGTYPE_CTS;
4881 igb_write_mbx(hw, &ping, 1, i);
4882 }
4883 }
4884
4885 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4886 {
4887 struct e1000_hw *hw = &adapter->hw;
4888 u32 vmolr = rd32(E1000_VMOLR(vf));
4889 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4890
4891 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4892 IGB_VF_FLAG_MULTI_PROMISC);
4893 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4894
4895 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4896 vmolr |= E1000_VMOLR_MPME;
4897 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4898 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4899 } else {
4900 /*
4901 * if we have hashes and we are clearing a multicast promisc
4902 * flag we need to write the hashes to the MTA as this step
4903 * was previously skipped
4904 */
4905 if (vf_data->num_vf_mc_hashes > 30) {
4906 vmolr |= E1000_VMOLR_MPME;
4907 } else if (vf_data->num_vf_mc_hashes) {
4908 int j;
4909 vmolr |= E1000_VMOLR_ROMPE;
4910 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4911 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4912 }
4913 }
4914
4915 wr32(E1000_VMOLR(vf), vmolr);
4916
4917 /* there are flags left unprocessed, likely not supported */
4918 if (*msgbuf & E1000_VT_MSGINFO_MASK)
4919 return -EINVAL;
4920
4921 return 0;
4922
4923 }
4924
4925 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4926 u32 *msgbuf, u32 vf)
4927 {
4928 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4929 u16 *hash_list = (u16 *)&msgbuf[1];
4930 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4931 int i;
4932
4933 /* salt away the number of multicast addresses assigned
4934 * to this VF for later use to restore when the PF multi cast
4935 * list changes
4936 */
4937 vf_data->num_vf_mc_hashes = n;
4938
4939 /* only up to 30 hash values supported */
4940 if (n > 30)
4941 n = 30;
4942
4943 /* store the hashes for later use */
4944 for (i = 0; i < n; i++)
4945 vf_data->vf_mc_hashes[i] = hash_list[i];
4946
4947 /* Flush and reset the mta with the new values */
4948 igb_set_rx_mode(adapter->netdev);
4949
4950 return 0;
4951 }
4952
4953 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4954 {
4955 struct e1000_hw *hw = &adapter->hw;
4956 struct vf_data_storage *vf_data;
4957 int i, j;
4958
4959 for (i = 0; i < adapter->vfs_allocated_count; i++) {
4960 u32 vmolr = rd32(E1000_VMOLR(i));
4961 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4962
4963 vf_data = &adapter->vf_data[i];
4964
4965 if ((vf_data->num_vf_mc_hashes > 30) ||
4966 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4967 vmolr |= E1000_VMOLR_MPME;
4968 } else if (vf_data->num_vf_mc_hashes) {
4969 vmolr |= E1000_VMOLR_ROMPE;
4970 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4971 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4972 }
4973 wr32(E1000_VMOLR(i), vmolr);
4974 }
4975 }
4976
4977 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4978 {
4979 struct e1000_hw *hw = &adapter->hw;
4980 u32 pool_mask, reg, vid;
4981 int i;
4982
4983 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4984
4985 /* Find the vlan filter for this id */
4986 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4987 reg = rd32(E1000_VLVF(i));
4988
4989 /* remove the vf from the pool */
4990 reg &= ~pool_mask;
4991
4992 /* if pool is empty then remove entry from vfta */
4993 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4994 (reg & E1000_VLVF_VLANID_ENABLE)) {
4995 reg = 0;
4996 vid = reg & E1000_VLVF_VLANID_MASK;
4997 igb_vfta_set(hw, vid, false);
4998 }
4999
5000 wr32(E1000_VLVF(i), reg);
5001 }
5002
5003 adapter->vf_data[vf].vlans_enabled = 0;
5004 }
5005
5006 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5007 {
5008 struct e1000_hw *hw = &adapter->hw;
5009 u32 reg, i;
5010
5011 /* The vlvf table only exists on 82576 hardware and newer */
5012 if (hw->mac.type < e1000_82576)
5013 return -1;
5014
5015 /* we only need to do this if VMDq is enabled */
5016 if (!adapter->vfs_allocated_count)
5017 return -1;
5018
5019 /* Find the vlan filter for this id */
5020 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5021 reg = rd32(E1000_VLVF(i));
5022 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5023 vid == (reg & E1000_VLVF_VLANID_MASK))
5024 break;
5025 }
5026
5027 if (add) {
5028 if (i == E1000_VLVF_ARRAY_SIZE) {
5029 /* Did not find a matching VLAN ID entry that was
5030 * enabled. Search for a free filter entry, i.e.
5031 * one without the enable bit set
5032 */
5033 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5034 reg = rd32(E1000_VLVF(i));
5035 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5036 break;
5037 }
5038 }
5039 if (i < E1000_VLVF_ARRAY_SIZE) {
5040 /* Found an enabled/available entry */
5041 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5042
5043 /* if !enabled we need to set this up in vfta */
5044 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5045 /* add VID to filter table */
5046 igb_vfta_set(hw, vid, true);
5047 reg |= E1000_VLVF_VLANID_ENABLE;
5048 }
5049 reg &= ~E1000_VLVF_VLANID_MASK;
5050 reg |= vid;
5051 wr32(E1000_VLVF(i), reg);
5052
5053 /* do not modify RLPML for PF devices */
5054 if (vf >= adapter->vfs_allocated_count)
5055 return 0;
5056
5057 if (!adapter->vf_data[vf].vlans_enabled) {
5058 u32 size;
5059 reg = rd32(E1000_VMOLR(vf));
5060 size = reg & E1000_VMOLR_RLPML_MASK;
5061 size += 4;
5062 reg &= ~E1000_VMOLR_RLPML_MASK;
5063 reg |= size;
5064 wr32(E1000_VMOLR(vf), reg);
5065 }
5066
5067 adapter->vf_data[vf].vlans_enabled++;
5068 return 0;
5069 }
5070 } else {
5071 if (i < E1000_VLVF_ARRAY_SIZE) {
5072 /* remove vf from the pool */
5073 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5074 /* if pool is empty then remove entry from vfta */
5075 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5076 reg = 0;
5077 igb_vfta_set(hw, vid, false);
5078 }
5079 wr32(E1000_VLVF(i), reg);
5080
5081 /* do not modify RLPML for PF devices */
5082 if (vf >= adapter->vfs_allocated_count)
5083 return 0;
5084
5085 adapter->vf_data[vf].vlans_enabled--;
5086 if (!adapter->vf_data[vf].vlans_enabled) {
5087 u32 size;
5088 reg = rd32(E1000_VMOLR(vf));
5089 size = reg & E1000_VMOLR_RLPML_MASK;
5090 size -= 4;
5091 reg &= ~E1000_VMOLR_RLPML_MASK;
5092 reg |= size;
5093 wr32(E1000_VMOLR(vf), reg);
5094 }
5095 }
5096 }
5097 return 0;
5098 }
5099
5100 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5101 {
5102 struct e1000_hw *hw = &adapter->hw;
5103
5104 if (vid)
5105 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5106 else
5107 wr32(E1000_VMVIR(vf), 0);
5108 }
5109
5110 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5111 int vf, u16 vlan, u8 qos)
5112 {
5113 int err = 0;
5114 struct igb_adapter *adapter = netdev_priv(netdev);
5115
5116 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5117 return -EINVAL;
5118 if (vlan || qos) {
5119 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5120 if (err)
5121 goto out;
5122 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5123 igb_set_vmolr(adapter, vf, !vlan);
5124 adapter->vf_data[vf].pf_vlan = vlan;
5125 adapter->vf_data[vf].pf_qos = qos;
5126 dev_info(&adapter->pdev->dev,
5127 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5128 if (test_bit(__IGB_DOWN, &adapter->state)) {
5129 dev_warn(&adapter->pdev->dev,
5130 "The VF VLAN has been set,"
5131 " but the PF device is not up.\n");
5132 dev_warn(&adapter->pdev->dev,
5133 "Bring the PF device up before"
5134 " attempting to use the VF device.\n");
5135 }
5136 } else {
5137 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5138 false, vf);
5139 igb_set_vmvir(adapter, vlan, vf);
5140 igb_set_vmolr(adapter, vf, true);
5141 adapter->vf_data[vf].pf_vlan = 0;
5142 adapter->vf_data[vf].pf_qos = 0;
5143 }
5144 out:
5145 return err;
5146 }
5147
5148 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5149 {
5150 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5151 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5152
5153 return igb_vlvf_set(adapter, vid, add, vf);
5154 }
5155
5156 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5157 {
5158 /* clear flags - except flag that indicates PF has set the MAC */
5159 adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5160 adapter->vf_data[vf].last_nack = jiffies;
5161
5162 /* reset offloads to defaults */
5163 igb_set_vmolr(adapter, vf, true);
5164
5165 /* reset vlans for device */
5166 igb_clear_vf_vfta(adapter, vf);
5167 if (adapter->vf_data[vf].pf_vlan)
5168 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5169 adapter->vf_data[vf].pf_vlan,
5170 adapter->vf_data[vf].pf_qos);
5171 else
5172 igb_clear_vf_vfta(adapter, vf);
5173
5174 /* reset multicast table array for vf */
5175 adapter->vf_data[vf].num_vf_mc_hashes = 0;
5176
5177 /* Flush and reset the mta with the new values */
5178 igb_set_rx_mode(adapter->netdev);
5179 }
5180
5181 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5182 {
5183 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5184
5185 /* generate a new mac address as we were hotplug removed/added */
5186 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5187 random_ether_addr(vf_mac);
5188
5189 /* process remaining reset events */
5190 igb_vf_reset(adapter, vf);
5191 }
5192
5193 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5194 {
5195 struct e1000_hw *hw = &adapter->hw;
5196 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5197 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5198 u32 reg, msgbuf[3];
5199 u8 *addr = (u8 *)(&msgbuf[1]);
5200
5201 /* process all the same items cleared in a function level reset */
5202 igb_vf_reset(adapter, vf);
5203
5204 /* set vf mac address */
5205 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5206
5207 /* enable transmit and receive for vf */
5208 reg = rd32(E1000_VFTE);
5209 wr32(E1000_VFTE, reg | (1 << vf));
5210 reg = rd32(E1000_VFRE);
5211 wr32(E1000_VFRE, reg | (1 << vf));
5212
5213 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5214
5215 /* reply to reset with ack and vf mac address */
5216 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5217 memcpy(addr, vf_mac, 6);
5218 igb_write_mbx(hw, msgbuf, 3, vf);
5219 }
5220
5221 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5222 {
5223 /*
5224 * The VF MAC Address is stored in a packed array of bytes
5225 * starting at the second 32 bit word of the msg array
5226 */
5227 unsigned char *addr = (char *)&msg[1];
5228 int err = -1;
5229
5230 if (is_valid_ether_addr(addr))
5231 err = igb_set_vf_mac(adapter, vf, addr);
5232
5233 return err;
5234 }
5235
5236 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5237 {
5238 struct e1000_hw *hw = &adapter->hw;
5239 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5240 u32 msg = E1000_VT_MSGTYPE_NACK;
5241
5242 /* if device isn't clear to send it shouldn't be reading either */
5243 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5244 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5245 igb_write_mbx(hw, &msg, 1, vf);
5246 vf_data->last_nack = jiffies;
5247 }
5248 }
5249
5250 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5251 {
5252 struct pci_dev *pdev = adapter->pdev;
5253 u32 msgbuf[E1000_VFMAILBOX_SIZE];
5254 struct e1000_hw *hw = &adapter->hw;
5255 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5256 s32 retval;
5257
5258 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5259
5260 if (retval) {
5261 /* if receive failed revoke VF CTS stats and restart init */
5262 dev_err(&pdev->dev, "Error receiving message from VF\n");
5263 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5264 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5265 return;
5266 goto out;
5267 }
5268
5269 /* this is a message we already processed, do nothing */
5270 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5271 return;
5272
5273 /*
5274 * until the vf completes a reset it should not be
5275 * allowed to start any configuration.
5276 */
5277
5278 if (msgbuf[0] == E1000_VF_RESET) {
5279 igb_vf_reset_msg(adapter, vf);
5280 return;
5281 }
5282
5283 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5284 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5285 return;
5286 retval = -1;
5287 goto out;
5288 }
5289
5290 switch ((msgbuf[0] & 0xFFFF)) {
5291 case E1000_VF_SET_MAC_ADDR:
5292 retval = -EINVAL;
5293 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5294 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5295 else
5296 dev_warn(&pdev->dev,
5297 "VF %d attempted to override administratively "
5298 "set MAC address\nReload the VF driver to "
5299 "resume operations\n", vf);
5300 break;
5301 case E1000_VF_SET_PROMISC:
5302 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5303 break;
5304 case E1000_VF_SET_MULTICAST:
5305 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5306 break;
5307 case E1000_VF_SET_LPE:
5308 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5309 break;
5310 case E1000_VF_SET_VLAN:
5311 retval = -1;
5312 if (vf_data->pf_vlan)
5313 dev_warn(&pdev->dev,
5314 "VF %d attempted to override administratively "
5315 "set VLAN tag\nReload the VF driver to "
5316 "resume operations\n", vf);
5317 else
5318 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5319 break;
5320 default:
5321 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5322 retval = -1;
5323 break;
5324 }
5325
5326 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5327 out:
5328 /* notify the VF of the results of what it sent us */
5329 if (retval)
5330 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5331 else
5332 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5333
5334 igb_write_mbx(hw, msgbuf, 1, vf);
5335 }
5336
5337 static void igb_msg_task(struct igb_adapter *adapter)
5338 {
5339 struct e1000_hw *hw = &adapter->hw;
5340 u32 vf;
5341
5342 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5343 /* process any reset requests */
5344 if (!igb_check_for_rst(hw, vf))
5345 igb_vf_reset_event(adapter, vf);
5346
5347 /* process any messages pending */
5348 if (!igb_check_for_msg(hw, vf))
5349 igb_rcv_msg_from_vf(adapter, vf);
5350
5351 /* process any acks */
5352 if (!igb_check_for_ack(hw, vf))
5353 igb_rcv_ack_from_vf(adapter, vf);
5354 }
5355 }
5356
5357 /**
5358 * igb_set_uta - Set unicast filter table address
5359 * @adapter: board private structure
5360 *
5361 * The unicast table address is a register array of 32-bit registers.
5362 * The table is meant to be used in a way similar to how the MTA is used
5363 * however due to certain limitations in the hardware it is necessary to
5364 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5365 * enable bit to allow vlan tag stripping when promiscuous mode is enabled
5366 **/
5367 static void igb_set_uta(struct igb_adapter *adapter)
5368 {
5369 struct e1000_hw *hw = &adapter->hw;
5370 int i;
5371
5372 /* The UTA table only exists on 82576 hardware and newer */
5373 if (hw->mac.type < e1000_82576)
5374 return;
5375
5376 /* we only need to do this if VMDq is enabled */
5377 if (!adapter->vfs_allocated_count)
5378 return;
5379
5380 for (i = 0; i < hw->mac.uta_reg_count; i++)
5381 array_wr32(E1000_UTA, i, ~0);
5382 }
5383
5384 /**
5385 * igb_intr_msi - Interrupt Handler
5386 * @irq: interrupt number
5387 * @data: pointer to a network interface device structure
5388 **/
5389 static irqreturn_t igb_intr_msi(int irq, void *data)
5390 {
5391 struct igb_adapter *adapter = data;
5392 struct igb_q_vector *q_vector = adapter->q_vector[0];
5393 struct e1000_hw *hw = &adapter->hw;
5394 /* read ICR disables interrupts using IAM */
5395 u32 icr = rd32(E1000_ICR);
5396
5397 igb_write_itr(q_vector);
5398
5399 if (icr & E1000_ICR_DRSTA)
5400 schedule_work(&adapter->reset_task);
5401
5402 if (icr & E1000_ICR_DOUTSYNC) {
5403 /* HW is reporting DMA is out of sync */
5404 adapter->stats.doosync++;
5405 }
5406
5407 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5408 hw->mac.get_link_status = 1;
5409 if (!test_bit(__IGB_DOWN, &adapter->state))
5410 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5411 }
5412
5413 napi_schedule(&q_vector->napi);
5414
5415 return IRQ_HANDLED;
5416 }
5417
5418 /**
5419 * igb_intr - Legacy Interrupt Handler
5420 * @irq: interrupt number
5421 * @data: pointer to a network interface device structure
5422 **/
5423 static irqreturn_t igb_intr(int irq, void *data)
5424 {
5425 struct igb_adapter *adapter = data;
5426 struct igb_q_vector *q_vector = adapter->q_vector[0];
5427 struct e1000_hw *hw = &adapter->hw;
5428 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5429 * need for the IMC write */
5430 u32 icr = rd32(E1000_ICR);
5431 if (!icr)
5432 return IRQ_NONE; /* Not our interrupt */
5433
5434 igb_write_itr(q_vector);
5435
5436 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5437 * not set, then the adapter didn't send an interrupt */
5438 if (!(icr & E1000_ICR_INT_ASSERTED))
5439 return IRQ_NONE;
5440
5441 if (icr & E1000_ICR_DRSTA)
5442 schedule_work(&adapter->reset_task);
5443
5444 if (icr & E1000_ICR_DOUTSYNC) {
5445 /* HW is reporting DMA is out of sync */
5446 adapter->stats.doosync++;
5447 }
5448
5449 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5450 hw->mac.get_link_status = 1;
5451 /* guard against interrupt when we're going down */
5452 if (!test_bit(__IGB_DOWN, &adapter->state))
5453 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5454 }
5455
5456 napi_schedule(&q_vector->napi);
5457
5458 return IRQ_HANDLED;
5459 }
5460
5461 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5462 {
5463 struct igb_adapter *adapter = q_vector->adapter;
5464 struct e1000_hw *hw = &adapter->hw;
5465
5466 if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5467 (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5468 if (!adapter->msix_entries)
5469 igb_set_itr(adapter);
5470 else
5471 igb_update_ring_itr(q_vector);
5472 }
5473
5474 if (!test_bit(__IGB_DOWN, &adapter->state)) {
5475 if (adapter->msix_entries)
5476 wr32(E1000_EIMS, q_vector->eims_value);
5477 else
5478 igb_irq_enable(adapter);
5479 }
5480 }
5481
5482 /**
5483 * igb_poll - NAPI Rx polling callback
5484 * @napi: napi polling structure
5485 * @budget: count of how many packets we should handle
5486 **/
5487 static int igb_poll(struct napi_struct *napi, int budget)
5488 {
5489 struct igb_q_vector *q_vector = container_of(napi,
5490 struct igb_q_vector,
5491 napi);
5492 bool clean_complete = true;
5493
5494 #ifdef CONFIG_IGB_DCA
5495 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5496 igb_update_dca(q_vector);
5497 #endif
5498 if (q_vector->tx_ring)
5499 clean_complete = igb_clean_tx_irq(q_vector);
5500
5501 if (q_vector->rx_ring)
5502 clean_complete &= igb_clean_rx_irq(q_vector, budget);
5503
5504 /* If all work not completed, return budget and keep polling */
5505 if (!clean_complete)
5506 return budget;
5507
5508 /* If not enough Rx work done, exit the polling mode */
5509 napi_complete(napi);
5510 igb_ring_irq_enable(q_vector);
5511
5512 return 0;
5513 }
5514
5515 /**
5516 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5517 * @adapter: board private structure
5518 * @shhwtstamps: timestamp structure to update
5519 * @regval: unsigned 64bit system time value.
5520 *
5521 * We need to convert the system time value stored in the RX/TXSTMP registers
5522 * into a hwtstamp which can be used by the upper level timestamping functions
5523 */
5524 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5525 struct skb_shared_hwtstamps *shhwtstamps,
5526 u64 regval)
5527 {
5528 u64 ns;
5529
5530 /*
5531 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5532 * 24 to match clock shift we setup earlier.
5533 */
5534 if (adapter->hw.mac.type == e1000_82580)
5535 regval <<= IGB_82580_TSYNC_SHIFT;
5536
5537 ns = timecounter_cyc2time(&adapter->clock, regval);
5538 timecompare_update(&adapter->compare, ns);
5539 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5540 shhwtstamps->hwtstamp = ns_to_ktime(ns);
5541 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5542 }
5543
5544 /**
5545 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5546 * @q_vector: pointer to q_vector containing needed info
5547 * @buffer: pointer to igb_tx_buffer structure
5548 *
5549 * If we were asked to do hardware stamping and such a time stamp is
5550 * available, then it must have been for this skb here because we only
5551 * allow only one such packet into the queue.
5552 */
5553 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5554 struct igb_tx_buffer *buffer_info)
5555 {
5556 struct igb_adapter *adapter = q_vector->adapter;
5557 struct e1000_hw *hw = &adapter->hw;
5558 struct skb_shared_hwtstamps shhwtstamps;
5559 u64 regval;
5560
5561 /* if skb does not support hw timestamp or TX stamp not valid exit */
5562 if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5563 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5564 return;
5565
5566 regval = rd32(E1000_TXSTMPL);
5567 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5568
5569 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5570 skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5571 }
5572
5573 /**
5574 * igb_clean_tx_irq - Reclaim resources after transmit completes
5575 * @q_vector: pointer to q_vector containing needed info
5576 * returns true if ring is completely cleaned
5577 **/
5578 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5579 {
5580 struct igb_adapter *adapter = q_vector->adapter;
5581 struct igb_ring *tx_ring = q_vector->tx_ring;
5582 struct igb_tx_buffer *tx_buffer;
5583 union e1000_adv_tx_desc *tx_desc, *eop_desc;
5584 unsigned int total_bytes = 0, total_packets = 0;
5585 unsigned int budget = q_vector->tx_work_limit;
5586 unsigned int i = tx_ring->next_to_clean;
5587
5588 if (test_bit(__IGB_DOWN, &adapter->state))
5589 return true;
5590
5591 tx_buffer = &tx_ring->tx_buffer_info[i];
5592 tx_desc = IGB_TX_DESC(tx_ring, i);
5593 i -= tx_ring->count;
5594
5595 for (; budget; budget--) {
5596 eop_desc = tx_buffer->next_to_watch;
5597
5598 /* prevent any other reads prior to eop_desc */
5599 rmb();
5600
5601 /* if next_to_watch is not set then there is no work pending */
5602 if (!eop_desc)
5603 break;
5604
5605 /* if DD is not set pending work has not been completed */
5606 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5607 break;
5608
5609 /* clear next_to_watch to prevent false hangs */
5610 tx_buffer->next_to_watch = NULL;
5611
5612 do {
5613 tx_desc->wb.status = 0;
5614 if (likely(tx_desc == eop_desc)) {
5615 eop_desc = NULL;
5616
5617 total_bytes += tx_buffer->bytecount;
5618 total_packets += tx_buffer->gso_segs;
5619 igb_tx_hwtstamp(q_vector, tx_buffer);
5620 }
5621
5622 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer);
5623
5624 tx_buffer++;
5625 tx_desc++;
5626 i++;
5627 if (unlikely(!i)) {
5628 i -= tx_ring->count;
5629 tx_buffer = tx_ring->tx_buffer_info;
5630 tx_desc = IGB_TX_DESC(tx_ring, 0);
5631 }
5632 } while (eop_desc);
5633 }
5634
5635 i += tx_ring->count;
5636 tx_ring->next_to_clean = i;
5637 u64_stats_update_begin(&tx_ring->tx_syncp);
5638 tx_ring->tx_stats.bytes += total_bytes;
5639 tx_ring->tx_stats.packets += total_packets;
5640 u64_stats_update_end(&tx_ring->tx_syncp);
5641 tx_ring->total_bytes += total_bytes;
5642 tx_ring->total_packets += total_packets;
5643
5644 if (tx_ring->detect_tx_hung) {
5645 struct e1000_hw *hw = &adapter->hw;
5646
5647 eop_desc = tx_buffer->next_to_watch;
5648
5649 /* Detect a transmit hang in hardware, this serializes the
5650 * check with the clearing of time_stamp and movement of i */
5651 tx_ring->detect_tx_hung = false;
5652 if (eop_desc &&
5653 time_after(jiffies, tx_buffer->time_stamp +
5654 (adapter->tx_timeout_factor * HZ)) &&
5655 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5656
5657 /* detected Tx unit hang */
5658 dev_err(tx_ring->dev,
5659 "Detected Tx Unit Hang\n"
5660 " Tx Queue <%d>\n"
5661 " TDH <%x>\n"
5662 " TDT <%x>\n"
5663 " next_to_use <%x>\n"
5664 " next_to_clean <%x>\n"
5665 "buffer_info[next_to_clean]\n"
5666 " time_stamp <%lx>\n"
5667 " next_to_watch <%p>\n"
5668 " jiffies <%lx>\n"
5669 " desc.status <%x>\n",
5670 tx_ring->queue_index,
5671 rd32(E1000_TDH(tx_ring->reg_idx)),
5672 readl(tx_ring->tail),
5673 tx_ring->next_to_use,
5674 tx_ring->next_to_clean,
5675 tx_buffer->time_stamp,
5676 eop_desc,
5677 jiffies,
5678 eop_desc->wb.status);
5679 netif_stop_subqueue(tx_ring->netdev,
5680 tx_ring->queue_index);
5681
5682 /* we are about to reset, no point in enabling stuff */
5683 return true;
5684 }
5685 }
5686
5687 if (unlikely(total_packets &&
5688 netif_carrier_ok(tx_ring->netdev) &&
5689 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5690 /* Make sure that anybody stopping the queue after this
5691 * sees the new next_to_clean.
5692 */
5693 smp_mb();
5694 if (__netif_subqueue_stopped(tx_ring->netdev,
5695 tx_ring->queue_index) &&
5696 !(test_bit(__IGB_DOWN, &adapter->state))) {
5697 netif_wake_subqueue(tx_ring->netdev,
5698 tx_ring->queue_index);
5699
5700 u64_stats_update_begin(&tx_ring->tx_syncp);
5701 tx_ring->tx_stats.restart_queue++;
5702 u64_stats_update_end(&tx_ring->tx_syncp);
5703 }
5704 }
5705
5706 return !!budget;
5707 }
5708
5709 static inline void igb_rx_checksum(struct igb_ring *ring,
5710 u32 status_err, struct sk_buff *skb)
5711 {
5712 skb_checksum_none_assert(skb);
5713
5714 /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5715 if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5716 (status_err & E1000_RXD_STAT_IXSM))
5717 return;
5718
5719 /* TCP/UDP checksum error bit is set */
5720 if (status_err &
5721 (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5722 /*
5723 * work around errata with sctp packets where the TCPE aka
5724 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5725 * packets, (aka let the stack check the crc32c)
5726 */
5727 if ((skb->len == 60) &&
5728 (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5729 u64_stats_update_begin(&ring->rx_syncp);
5730 ring->rx_stats.csum_err++;
5731 u64_stats_update_end(&ring->rx_syncp);
5732 }
5733 /* let the stack verify checksum errors */
5734 return;
5735 }
5736 /* It must be a TCP or UDP packet with a valid checksum */
5737 if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5738 skb->ip_summed = CHECKSUM_UNNECESSARY;
5739
5740 dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5741 }
5742
5743 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5744 struct sk_buff *skb)
5745 {
5746 struct igb_adapter *adapter = q_vector->adapter;
5747 struct e1000_hw *hw = &adapter->hw;
5748 u64 regval;
5749
5750 /*
5751 * If this bit is set, then the RX registers contain the time stamp. No
5752 * other packet will be time stamped until we read these registers, so
5753 * read the registers to make them available again. Because only one
5754 * packet can be time stamped at a time, we know that the register
5755 * values must belong to this one here and therefore we don't need to
5756 * compare any of the additional attributes stored for it.
5757 *
5758 * If nothing went wrong, then it should have a shared tx_flags that we
5759 * can turn into a skb_shared_hwtstamps.
5760 */
5761 if (staterr & E1000_RXDADV_STAT_TSIP) {
5762 u32 *stamp = (u32 *)skb->data;
5763 regval = le32_to_cpu(*(stamp + 2));
5764 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5765 skb_pull(skb, IGB_TS_HDR_LEN);
5766 } else {
5767 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5768 return;
5769
5770 regval = rd32(E1000_RXSTMPL);
5771 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5772 }
5773
5774 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5775 }
5776 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
5777 {
5778 /* HW will not DMA in data larger than the given buffer, even if it
5779 * parses the (NFS, of course) header to be larger. In that case, it
5780 * fills the header buffer and spills the rest into the page.
5781 */
5782 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5783 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5784 if (hlen > IGB_RX_HDR_LEN)
5785 hlen = IGB_RX_HDR_LEN;
5786 return hlen;
5787 }
5788
5789 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
5790 {
5791 struct igb_ring *rx_ring = q_vector->rx_ring;
5792 union e1000_adv_rx_desc *rx_desc;
5793 const int current_node = numa_node_id();
5794 unsigned int total_bytes = 0, total_packets = 0;
5795 u32 staterr;
5796 u16 cleaned_count = igb_desc_unused(rx_ring);
5797 u16 i = rx_ring->next_to_clean;
5798
5799 rx_desc = IGB_RX_DESC(rx_ring, i);
5800 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5801
5802 while (staterr & E1000_RXD_STAT_DD) {
5803 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
5804 struct sk_buff *skb = buffer_info->skb;
5805 union e1000_adv_rx_desc *next_rxd;
5806
5807 buffer_info->skb = NULL;
5808 prefetch(skb->data);
5809
5810 i++;
5811 if (i == rx_ring->count)
5812 i = 0;
5813
5814 next_rxd = IGB_RX_DESC(rx_ring, i);
5815 prefetch(next_rxd);
5816
5817 /*
5818 * This memory barrier is needed to keep us from reading
5819 * any other fields out of the rx_desc until we know the
5820 * RXD_STAT_DD bit is set
5821 */
5822 rmb();
5823
5824 if (!skb_is_nonlinear(skb)) {
5825 __skb_put(skb, igb_get_hlen(rx_desc));
5826 dma_unmap_single(rx_ring->dev, buffer_info->dma,
5827 IGB_RX_HDR_LEN,
5828 DMA_FROM_DEVICE);
5829 buffer_info->dma = 0;
5830 }
5831
5832 if (rx_desc->wb.upper.length) {
5833 u16 length = le16_to_cpu(rx_desc->wb.upper.length);
5834
5835 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5836 buffer_info->page,
5837 buffer_info->page_offset,
5838 length);
5839
5840 skb->len += length;
5841 skb->data_len += length;
5842 skb->truesize += length;
5843
5844 if ((page_count(buffer_info->page) != 1) ||
5845 (page_to_nid(buffer_info->page) != current_node))
5846 buffer_info->page = NULL;
5847 else
5848 get_page(buffer_info->page);
5849
5850 dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
5851 PAGE_SIZE / 2, DMA_FROM_DEVICE);
5852 buffer_info->page_dma = 0;
5853 }
5854
5855 if (!(staterr & E1000_RXD_STAT_EOP)) {
5856 struct igb_rx_buffer *next_buffer;
5857 next_buffer = &rx_ring->rx_buffer_info[i];
5858 buffer_info->skb = next_buffer->skb;
5859 buffer_info->dma = next_buffer->dma;
5860 next_buffer->skb = skb;
5861 next_buffer->dma = 0;
5862 goto next_desc;
5863 }
5864
5865 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5866 dev_kfree_skb_any(skb);
5867 goto next_desc;
5868 }
5869
5870 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5871 igb_rx_hwtstamp(q_vector, staterr, skb);
5872 total_bytes += skb->len;
5873 total_packets++;
5874
5875 igb_rx_checksum(rx_ring, staterr, skb);
5876
5877 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
5878
5879 if (staterr & E1000_RXD_STAT_VP) {
5880 u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5881
5882 __vlan_hwaccel_put_tag(skb, vid);
5883 }
5884 napi_gro_receive(&q_vector->napi, skb);
5885
5886 budget--;
5887 next_desc:
5888 if (!budget)
5889 break;
5890
5891 cleaned_count++;
5892 /* return some buffers to hardware, one at a time is too slow */
5893 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5894 igb_alloc_rx_buffers(rx_ring, cleaned_count);
5895 cleaned_count = 0;
5896 }
5897
5898 /* use prefetched values */
5899 rx_desc = next_rxd;
5900 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5901 }
5902
5903 rx_ring->next_to_clean = i;
5904 u64_stats_update_begin(&rx_ring->rx_syncp);
5905 rx_ring->rx_stats.packets += total_packets;
5906 rx_ring->rx_stats.bytes += total_bytes;
5907 u64_stats_update_end(&rx_ring->rx_syncp);
5908 rx_ring->total_packets += total_packets;
5909 rx_ring->total_bytes += total_bytes;
5910
5911 if (cleaned_count)
5912 igb_alloc_rx_buffers(rx_ring, cleaned_count);
5913
5914 return !!budget;
5915 }
5916
5917 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
5918 struct igb_rx_buffer *bi)
5919 {
5920 struct sk_buff *skb = bi->skb;
5921 dma_addr_t dma = bi->dma;
5922
5923 if (dma)
5924 return true;
5925
5926 if (likely(!skb)) {
5927 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
5928 IGB_RX_HDR_LEN);
5929 bi->skb = skb;
5930 if (!skb) {
5931 rx_ring->rx_stats.alloc_failed++;
5932 return false;
5933 }
5934
5935 /* initialize skb for ring */
5936 skb_record_rx_queue(skb, rx_ring->queue_index);
5937 }
5938
5939 dma = dma_map_single(rx_ring->dev, skb->data,
5940 IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
5941
5942 if (dma_mapping_error(rx_ring->dev, dma)) {
5943 rx_ring->rx_stats.alloc_failed++;
5944 return false;
5945 }
5946
5947 bi->dma = dma;
5948 return true;
5949 }
5950
5951 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
5952 struct igb_rx_buffer *bi)
5953 {
5954 struct page *page = bi->page;
5955 dma_addr_t page_dma = bi->page_dma;
5956 unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
5957
5958 if (page_dma)
5959 return true;
5960
5961 if (!page) {
5962 page = netdev_alloc_page(rx_ring->netdev);
5963 bi->page = page;
5964 if (unlikely(!page)) {
5965 rx_ring->rx_stats.alloc_failed++;
5966 return false;
5967 }
5968 }
5969
5970 page_dma = dma_map_page(rx_ring->dev, page,
5971 page_offset, PAGE_SIZE / 2,
5972 DMA_FROM_DEVICE);
5973
5974 if (dma_mapping_error(rx_ring->dev, page_dma)) {
5975 rx_ring->rx_stats.alloc_failed++;
5976 return false;
5977 }
5978
5979 bi->page_dma = page_dma;
5980 bi->page_offset = page_offset;
5981 return true;
5982 }
5983
5984 /**
5985 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
5986 * @adapter: address of board private structure
5987 **/
5988 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
5989 {
5990 union e1000_adv_rx_desc *rx_desc;
5991 struct igb_rx_buffer *bi;
5992 u16 i = rx_ring->next_to_use;
5993
5994 rx_desc = IGB_RX_DESC(rx_ring, i);
5995 bi = &rx_ring->rx_buffer_info[i];
5996 i -= rx_ring->count;
5997
5998 while (cleaned_count--) {
5999 if (!igb_alloc_mapped_skb(rx_ring, bi))
6000 break;
6001
6002 /* Refresh the desc even if buffer_addrs didn't change
6003 * because each write-back erases this info. */
6004 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6005
6006 if (!igb_alloc_mapped_page(rx_ring, bi))
6007 break;
6008
6009 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6010
6011 rx_desc++;
6012 bi++;
6013 i++;
6014 if (unlikely(!i)) {
6015 rx_desc = IGB_RX_DESC(rx_ring, 0);
6016 bi = rx_ring->rx_buffer_info;
6017 i -= rx_ring->count;
6018 }
6019
6020 /* clear the hdr_addr for the next_to_use descriptor */
6021 rx_desc->read.hdr_addr = 0;
6022 }
6023
6024 i += rx_ring->count;
6025
6026 if (rx_ring->next_to_use != i) {
6027 rx_ring->next_to_use = i;
6028
6029 /* Force memory writes to complete before letting h/w
6030 * know there are new descriptors to fetch. (Only
6031 * applicable for weak-ordered memory model archs,
6032 * such as IA-64). */
6033 wmb();
6034 writel(i, rx_ring->tail);
6035 }
6036 }
6037
6038 /**
6039 * igb_mii_ioctl -
6040 * @netdev:
6041 * @ifreq:
6042 * @cmd:
6043 **/
6044 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6045 {
6046 struct igb_adapter *adapter = netdev_priv(netdev);
6047 struct mii_ioctl_data *data = if_mii(ifr);
6048
6049 if (adapter->hw.phy.media_type != e1000_media_type_copper)
6050 return -EOPNOTSUPP;
6051
6052 switch (cmd) {
6053 case SIOCGMIIPHY:
6054 data->phy_id = adapter->hw.phy.addr;
6055 break;
6056 case SIOCGMIIREG:
6057 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6058 &data->val_out))
6059 return -EIO;
6060 break;
6061 case SIOCSMIIREG:
6062 default:
6063 return -EOPNOTSUPP;
6064 }
6065 return 0;
6066 }
6067
6068 /**
6069 * igb_hwtstamp_ioctl - control hardware time stamping
6070 * @netdev:
6071 * @ifreq:
6072 * @cmd:
6073 *
6074 * Outgoing time stamping can be enabled and disabled. Play nice and
6075 * disable it when requested, although it shouldn't case any overhead
6076 * when no packet needs it. At most one packet in the queue may be
6077 * marked for time stamping, otherwise it would be impossible to tell
6078 * for sure to which packet the hardware time stamp belongs.
6079 *
6080 * Incoming time stamping has to be configured via the hardware
6081 * filters. Not all combinations are supported, in particular event
6082 * type has to be specified. Matching the kind of event packet is
6083 * not supported, with the exception of "all V2 events regardless of
6084 * level 2 or 4".
6085 *
6086 **/
6087 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6088 struct ifreq *ifr, int cmd)
6089 {
6090 struct igb_adapter *adapter = netdev_priv(netdev);
6091 struct e1000_hw *hw = &adapter->hw;
6092 struct hwtstamp_config config;
6093 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6094 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6095 u32 tsync_rx_cfg = 0;
6096 bool is_l4 = false;
6097 bool is_l2 = false;
6098 u32 regval;
6099
6100 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6101 return -EFAULT;
6102
6103 /* reserved for future extensions */
6104 if (config.flags)
6105 return -EINVAL;
6106
6107 switch (config.tx_type) {
6108 case HWTSTAMP_TX_OFF:
6109 tsync_tx_ctl = 0;
6110 case HWTSTAMP_TX_ON:
6111 break;
6112 default:
6113 return -ERANGE;
6114 }
6115
6116 switch (config.rx_filter) {
6117 case HWTSTAMP_FILTER_NONE:
6118 tsync_rx_ctl = 0;
6119 break;
6120 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6121 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6122 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6123 case HWTSTAMP_FILTER_ALL:
6124 /*
6125 * register TSYNCRXCFG must be set, therefore it is not
6126 * possible to time stamp both Sync and Delay_Req messages
6127 * => fall back to time stamping all packets
6128 */
6129 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6130 config.rx_filter = HWTSTAMP_FILTER_ALL;
6131 break;
6132 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6133 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6134 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6135 is_l4 = true;
6136 break;
6137 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6138 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6139 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6140 is_l4 = true;
6141 break;
6142 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6143 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6144 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6145 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6146 is_l2 = true;
6147 is_l4 = true;
6148 config.rx_filter = HWTSTAMP_FILTER_SOME;
6149 break;
6150 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6151 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6152 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6153 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6154 is_l2 = true;
6155 is_l4 = true;
6156 config.rx_filter = HWTSTAMP_FILTER_SOME;
6157 break;
6158 case HWTSTAMP_FILTER_PTP_V2_EVENT:
6159 case HWTSTAMP_FILTER_PTP_V2_SYNC:
6160 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6161 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6162 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6163 is_l2 = true;
6164 break;
6165 default:
6166 return -ERANGE;
6167 }
6168
6169 if (hw->mac.type == e1000_82575) {
6170 if (tsync_rx_ctl | tsync_tx_ctl)
6171 return -EINVAL;
6172 return 0;
6173 }
6174
6175 /*
6176 * Per-packet timestamping only works if all packets are
6177 * timestamped, so enable timestamping in all packets as
6178 * long as one rx filter was configured.
6179 */
6180 if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6181 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6182 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6183 }
6184
6185 /* enable/disable TX */
6186 regval = rd32(E1000_TSYNCTXCTL);
6187 regval &= ~E1000_TSYNCTXCTL_ENABLED;
6188 regval |= tsync_tx_ctl;
6189 wr32(E1000_TSYNCTXCTL, regval);
6190
6191 /* enable/disable RX */
6192 regval = rd32(E1000_TSYNCRXCTL);
6193 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6194 regval |= tsync_rx_ctl;
6195 wr32(E1000_TSYNCRXCTL, regval);
6196
6197 /* define which PTP packets are time stamped */
6198 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6199
6200 /* define ethertype filter for timestamped packets */
6201 if (is_l2)
6202 wr32(E1000_ETQF(3),
6203 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6204 E1000_ETQF_1588 | /* enable timestamping */
6205 ETH_P_1588)); /* 1588 eth protocol type */
6206 else
6207 wr32(E1000_ETQF(3), 0);
6208
6209 #define PTP_PORT 319
6210 /* L4 Queue Filter[3]: filter by destination port and protocol */
6211 if (is_l4) {
6212 u32 ftqf = (IPPROTO_UDP /* UDP */
6213 | E1000_FTQF_VF_BP /* VF not compared */
6214 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6215 | E1000_FTQF_MASK); /* mask all inputs */
6216 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6217
6218 wr32(E1000_IMIR(3), htons(PTP_PORT));
6219 wr32(E1000_IMIREXT(3),
6220 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6221 if (hw->mac.type == e1000_82576) {
6222 /* enable source port check */
6223 wr32(E1000_SPQF(3), htons(PTP_PORT));
6224 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6225 }
6226 wr32(E1000_FTQF(3), ftqf);
6227 } else {
6228 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6229 }
6230 wrfl();
6231
6232 adapter->hwtstamp_config = config;
6233
6234 /* clear TX/RX time stamp registers, just to be sure */
6235 regval = rd32(E1000_TXSTMPH);
6236 regval = rd32(E1000_RXSTMPH);
6237
6238 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6239 -EFAULT : 0;
6240 }
6241
6242 /**
6243 * igb_ioctl -
6244 * @netdev:
6245 * @ifreq:
6246 * @cmd:
6247 **/
6248 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6249 {
6250 switch (cmd) {
6251 case SIOCGMIIPHY:
6252 case SIOCGMIIREG:
6253 case SIOCSMIIREG:
6254 return igb_mii_ioctl(netdev, ifr, cmd);
6255 case SIOCSHWTSTAMP:
6256 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6257 default:
6258 return -EOPNOTSUPP;
6259 }
6260 }
6261
6262 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6263 {
6264 struct igb_adapter *adapter = hw->back;
6265 u16 cap_offset;
6266
6267 cap_offset = adapter->pdev->pcie_cap;
6268 if (!cap_offset)
6269 return -E1000_ERR_CONFIG;
6270
6271 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6272
6273 return 0;
6274 }
6275
6276 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6277 {
6278 struct igb_adapter *adapter = hw->back;
6279 u16 cap_offset;
6280
6281 cap_offset = adapter->pdev->pcie_cap;
6282 if (!cap_offset)
6283 return -E1000_ERR_CONFIG;
6284
6285 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6286
6287 return 0;
6288 }
6289
6290 static void igb_vlan_mode(struct net_device *netdev, u32 features)
6291 {
6292 struct igb_adapter *adapter = netdev_priv(netdev);
6293 struct e1000_hw *hw = &adapter->hw;
6294 u32 ctrl, rctl;
6295
6296 igb_irq_disable(adapter);
6297
6298 if (features & NETIF_F_HW_VLAN_RX) {
6299 /* enable VLAN tag insert/strip */
6300 ctrl = rd32(E1000_CTRL);
6301 ctrl |= E1000_CTRL_VME;
6302 wr32(E1000_CTRL, ctrl);
6303
6304 /* Disable CFI check */
6305 rctl = rd32(E1000_RCTL);
6306 rctl &= ~E1000_RCTL_CFIEN;
6307 wr32(E1000_RCTL, rctl);
6308 } else {
6309 /* disable VLAN tag insert/strip */
6310 ctrl = rd32(E1000_CTRL);
6311 ctrl &= ~E1000_CTRL_VME;
6312 wr32(E1000_CTRL, ctrl);
6313 }
6314
6315 igb_rlpml_set(adapter);
6316
6317 if (!test_bit(__IGB_DOWN, &adapter->state))
6318 igb_irq_enable(adapter);
6319 }
6320
6321 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6322 {
6323 struct igb_adapter *adapter = netdev_priv(netdev);
6324 struct e1000_hw *hw = &adapter->hw;
6325 int pf_id = adapter->vfs_allocated_count;
6326
6327 /* attempt to add filter to vlvf array */
6328 igb_vlvf_set(adapter, vid, true, pf_id);
6329
6330 /* add the filter since PF can receive vlans w/o entry in vlvf */
6331 igb_vfta_set(hw, vid, true);
6332
6333 set_bit(vid, adapter->active_vlans);
6334 }
6335
6336 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6337 {
6338 struct igb_adapter *adapter = netdev_priv(netdev);
6339 struct e1000_hw *hw = &adapter->hw;
6340 int pf_id = adapter->vfs_allocated_count;
6341 s32 err;
6342
6343 igb_irq_disable(adapter);
6344
6345 if (!test_bit(__IGB_DOWN, &adapter->state))
6346 igb_irq_enable(adapter);
6347
6348 /* remove vlan from VLVF table array */
6349 err = igb_vlvf_set(adapter, vid, false, pf_id);
6350
6351 /* if vid was not present in VLVF just remove it from table */
6352 if (err)
6353 igb_vfta_set(hw, vid, false);
6354
6355 clear_bit(vid, adapter->active_vlans);
6356 }
6357
6358 static void igb_restore_vlan(struct igb_adapter *adapter)
6359 {
6360 u16 vid;
6361
6362 for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6363 igb_vlan_rx_add_vid(adapter->netdev, vid);
6364 }
6365
6366 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6367 {
6368 struct pci_dev *pdev = adapter->pdev;
6369 struct e1000_mac_info *mac = &adapter->hw.mac;
6370
6371 mac->autoneg = 0;
6372
6373 /* Make sure dplx is at most 1 bit and lsb of speed is not set
6374 * for the switch() below to work */
6375 if ((spd & 1) || (dplx & ~1))
6376 goto err_inval;
6377
6378 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6379 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6380 spd != SPEED_1000 &&
6381 dplx != DUPLEX_FULL)
6382 goto err_inval;
6383
6384 switch (spd + dplx) {
6385 case SPEED_10 + DUPLEX_HALF:
6386 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6387 break;
6388 case SPEED_10 + DUPLEX_FULL:
6389 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6390 break;
6391 case SPEED_100 + DUPLEX_HALF:
6392 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6393 break;
6394 case SPEED_100 + DUPLEX_FULL:
6395 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6396 break;
6397 case SPEED_1000 + DUPLEX_FULL:
6398 mac->autoneg = 1;
6399 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6400 break;
6401 case SPEED_1000 + DUPLEX_HALF: /* not supported */
6402 default:
6403 goto err_inval;
6404 }
6405 return 0;
6406
6407 err_inval:
6408 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6409 return -EINVAL;
6410 }
6411
6412 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6413 {
6414 struct net_device *netdev = pci_get_drvdata(pdev);
6415 struct igb_adapter *adapter = netdev_priv(netdev);
6416 struct e1000_hw *hw = &adapter->hw;
6417 u32 ctrl, rctl, status;
6418 u32 wufc = adapter->wol;
6419 #ifdef CONFIG_PM
6420 int retval = 0;
6421 #endif
6422
6423 netif_device_detach(netdev);
6424
6425 if (netif_running(netdev))
6426 igb_close(netdev);
6427
6428 igb_clear_interrupt_scheme(adapter);
6429
6430 #ifdef CONFIG_PM
6431 retval = pci_save_state(pdev);
6432 if (retval)
6433 return retval;
6434 #endif
6435
6436 status = rd32(E1000_STATUS);
6437 if (status & E1000_STATUS_LU)
6438 wufc &= ~E1000_WUFC_LNKC;
6439
6440 if (wufc) {
6441 igb_setup_rctl(adapter);
6442 igb_set_rx_mode(netdev);
6443
6444 /* turn on all-multi mode if wake on multicast is enabled */
6445 if (wufc & E1000_WUFC_MC) {
6446 rctl = rd32(E1000_RCTL);
6447 rctl |= E1000_RCTL_MPE;
6448 wr32(E1000_RCTL, rctl);
6449 }
6450
6451 ctrl = rd32(E1000_CTRL);
6452 /* advertise wake from D3Cold */
6453 #define E1000_CTRL_ADVD3WUC 0x00100000
6454 /* phy power management enable */
6455 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6456 ctrl |= E1000_CTRL_ADVD3WUC;
6457 wr32(E1000_CTRL, ctrl);
6458
6459 /* Allow time for pending master requests to run */
6460 igb_disable_pcie_master(hw);
6461
6462 wr32(E1000_WUC, E1000_WUC_PME_EN);
6463 wr32(E1000_WUFC, wufc);
6464 } else {
6465 wr32(E1000_WUC, 0);
6466 wr32(E1000_WUFC, 0);
6467 }
6468
6469 *enable_wake = wufc || adapter->en_mng_pt;
6470 if (!*enable_wake)
6471 igb_power_down_link(adapter);
6472 else
6473 igb_power_up_link(adapter);
6474
6475 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6476 * would have already happened in close and is redundant. */
6477 igb_release_hw_control(adapter);
6478
6479 pci_disable_device(pdev);
6480
6481 return 0;
6482 }
6483
6484 #ifdef CONFIG_PM
6485 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6486 {
6487 int retval;
6488 bool wake;
6489
6490 retval = __igb_shutdown(pdev, &wake);
6491 if (retval)
6492 return retval;
6493
6494 if (wake) {
6495 pci_prepare_to_sleep(pdev);
6496 } else {
6497 pci_wake_from_d3(pdev, false);
6498 pci_set_power_state(pdev, PCI_D3hot);
6499 }
6500
6501 return 0;
6502 }
6503
6504 static int igb_resume(struct pci_dev *pdev)
6505 {
6506 struct net_device *netdev = pci_get_drvdata(pdev);
6507 struct igb_adapter *adapter = netdev_priv(netdev);
6508 struct e1000_hw *hw = &adapter->hw;
6509 u32 err;
6510
6511 pci_set_power_state(pdev, PCI_D0);
6512 pci_restore_state(pdev);
6513 pci_save_state(pdev);
6514
6515 err = pci_enable_device_mem(pdev);
6516 if (err) {
6517 dev_err(&pdev->dev,
6518 "igb: Cannot enable PCI device from suspend\n");
6519 return err;
6520 }
6521 pci_set_master(pdev);
6522
6523 pci_enable_wake(pdev, PCI_D3hot, 0);
6524 pci_enable_wake(pdev, PCI_D3cold, 0);
6525
6526 if (igb_init_interrupt_scheme(adapter)) {
6527 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6528 return -ENOMEM;
6529 }
6530
6531 igb_reset(adapter);
6532
6533 /* let the f/w know that the h/w is now under the control of the
6534 * driver. */
6535 igb_get_hw_control(adapter);
6536
6537 wr32(E1000_WUS, ~0);
6538
6539 if (netif_running(netdev)) {
6540 err = igb_open(netdev);
6541 if (err)
6542 return err;
6543 }
6544
6545 netif_device_attach(netdev);
6546
6547 return 0;
6548 }
6549 #endif
6550
6551 static void igb_shutdown(struct pci_dev *pdev)
6552 {
6553 bool wake;
6554
6555 __igb_shutdown(pdev, &wake);
6556
6557 if (system_state == SYSTEM_POWER_OFF) {
6558 pci_wake_from_d3(pdev, wake);
6559 pci_set_power_state(pdev, PCI_D3hot);
6560 }
6561 }
6562
6563 #ifdef CONFIG_NET_POLL_CONTROLLER
6564 /*
6565 * Polling 'interrupt' - used by things like netconsole to send skbs
6566 * without having to re-enable interrupts. It's not called while
6567 * the interrupt routine is executing.
6568 */
6569 static void igb_netpoll(struct net_device *netdev)
6570 {
6571 struct igb_adapter *adapter = netdev_priv(netdev);
6572 struct e1000_hw *hw = &adapter->hw;
6573 int i;
6574
6575 if (!adapter->msix_entries) {
6576 struct igb_q_vector *q_vector = adapter->q_vector[0];
6577 igb_irq_disable(adapter);
6578 napi_schedule(&q_vector->napi);
6579 return;
6580 }
6581
6582 for (i = 0; i < adapter->num_q_vectors; i++) {
6583 struct igb_q_vector *q_vector = adapter->q_vector[i];
6584 wr32(E1000_EIMC, q_vector->eims_value);
6585 napi_schedule(&q_vector->napi);
6586 }
6587 }
6588 #endif /* CONFIG_NET_POLL_CONTROLLER */
6589
6590 /**
6591 * igb_io_error_detected - called when PCI error is detected
6592 * @pdev: Pointer to PCI device
6593 * @state: The current pci connection state
6594 *
6595 * This function is called after a PCI bus error affecting
6596 * this device has been detected.
6597 */
6598 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6599 pci_channel_state_t state)
6600 {
6601 struct net_device *netdev = pci_get_drvdata(pdev);
6602 struct igb_adapter *adapter = netdev_priv(netdev);
6603
6604 netif_device_detach(netdev);
6605
6606 if (state == pci_channel_io_perm_failure)
6607 return PCI_ERS_RESULT_DISCONNECT;
6608
6609 if (netif_running(netdev))
6610 igb_down(adapter);
6611 pci_disable_device(pdev);
6612
6613 /* Request a slot slot reset. */
6614 return PCI_ERS_RESULT_NEED_RESET;
6615 }
6616
6617 /**
6618 * igb_io_slot_reset - called after the pci bus has been reset.
6619 * @pdev: Pointer to PCI device
6620 *
6621 * Restart the card from scratch, as if from a cold-boot. Implementation
6622 * resembles the first-half of the igb_resume routine.
6623 */
6624 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6625 {
6626 struct net_device *netdev = pci_get_drvdata(pdev);
6627 struct igb_adapter *adapter = netdev_priv(netdev);
6628 struct e1000_hw *hw = &adapter->hw;
6629 pci_ers_result_t result;
6630 int err;
6631
6632 if (pci_enable_device_mem(pdev)) {
6633 dev_err(&pdev->dev,
6634 "Cannot re-enable PCI device after reset.\n");
6635 result = PCI_ERS_RESULT_DISCONNECT;
6636 } else {
6637 pci_set_master(pdev);
6638 pci_restore_state(pdev);
6639 pci_save_state(pdev);
6640
6641 pci_enable_wake(pdev, PCI_D3hot, 0);
6642 pci_enable_wake(pdev, PCI_D3cold, 0);
6643
6644 igb_reset(adapter);
6645 wr32(E1000_WUS, ~0);
6646 result = PCI_ERS_RESULT_RECOVERED;
6647 }
6648
6649 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6650 if (err) {
6651 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6652 "failed 0x%0x\n", err);
6653 /* non-fatal, continue */
6654 }
6655
6656 return result;
6657 }
6658
6659 /**
6660 * igb_io_resume - called when traffic can start flowing again.
6661 * @pdev: Pointer to PCI device
6662 *
6663 * This callback is called when the error recovery driver tells us that
6664 * its OK to resume normal operation. Implementation resembles the
6665 * second-half of the igb_resume routine.
6666 */
6667 static void igb_io_resume(struct pci_dev *pdev)
6668 {
6669 struct net_device *netdev = pci_get_drvdata(pdev);
6670 struct igb_adapter *adapter = netdev_priv(netdev);
6671
6672 if (netif_running(netdev)) {
6673 if (igb_up(adapter)) {
6674 dev_err(&pdev->dev, "igb_up failed after reset\n");
6675 return;
6676 }
6677 }
6678
6679 netif_device_attach(netdev);
6680
6681 /* let the f/w know that the h/w is now under the control of the
6682 * driver. */
6683 igb_get_hw_control(adapter);
6684 }
6685
6686 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6687 u8 qsel)
6688 {
6689 u32 rar_low, rar_high;
6690 struct e1000_hw *hw = &adapter->hw;
6691
6692 /* HW expects these in little endian so we reverse the byte order
6693 * from network order (big endian) to little endian
6694 */
6695 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6696 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6697 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6698
6699 /* Indicate to hardware the Address is Valid. */
6700 rar_high |= E1000_RAH_AV;
6701
6702 if (hw->mac.type == e1000_82575)
6703 rar_high |= E1000_RAH_POOL_1 * qsel;
6704 else
6705 rar_high |= E1000_RAH_POOL_1 << qsel;
6706
6707 wr32(E1000_RAL(index), rar_low);
6708 wrfl();
6709 wr32(E1000_RAH(index), rar_high);
6710 wrfl();
6711 }
6712
6713 static int igb_set_vf_mac(struct igb_adapter *adapter,
6714 int vf, unsigned char *mac_addr)
6715 {
6716 struct e1000_hw *hw = &adapter->hw;
6717 /* VF MAC addresses start at end of receive addresses and moves
6718 * torwards the first, as a result a collision should not be possible */
6719 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6720
6721 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6722
6723 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6724
6725 return 0;
6726 }
6727
6728 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6729 {
6730 struct igb_adapter *adapter = netdev_priv(netdev);
6731 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6732 return -EINVAL;
6733 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6734 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6735 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6736 " change effective.");
6737 if (test_bit(__IGB_DOWN, &adapter->state)) {
6738 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6739 " but the PF device is not up.\n");
6740 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6741 " attempting to use the VF device.\n");
6742 }
6743 return igb_set_vf_mac(adapter, vf, mac);
6744 }
6745
6746 static int igb_link_mbps(int internal_link_speed)
6747 {
6748 switch (internal_link_speed) {
6749 case SPEED_100:
6750 return 100;
6751 case SPEED_1000:
6752 return 1000;
6753 default:
6754 return 0;
6755 }
6756 }
6757
6758 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6759 int link_speed)
6760 {
6761 int rf_dec, rf_int;
6762 u32 bcnrc_val;
6763
6764 if (tx_rate != 0) {
6765 /* Calculate the rate factor values to set */
6766 rf_int = link_speed / tx_rate;
6767 rf_dec = (link_speed - (rf_int * tx_rate));
6768 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6769
6770 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6771 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6772 E1000_RTTBCNRC_RF_INT_MASK);
6773 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6774 } else {
6775 bcnrc_val = 0;
6776 }
6777
6778 wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6779 wr32(E1000_RTTBCNRC, bcnrc_val);
6780 }
6781
6782 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6783 {
6784 int actual_link_speed, i;
6785 bool reset_rate = false;
6786
6787 /* VF TX rate limit was not set or not supported */
6788 if ((adapter->vf_rate_link_speed == 0) ||
6789 (adapter->hw.mac.type != e1000_82576))
6790 return;
6791
6792 actual_link_speed = igb_link_mbps(adapter->link_speed);
6793 if (actual_link_speed != adapter->vf_rate_link_speed) {
6794 reset_rate = true;
6795 adapter->vf_rate_link_speed = 0;
6796 dev_info(&adapter->pdev->dev,
6797 "Link speed has been changed. VF Transmit "
6798 "rate is disabled\n");
6799 }
6800
6801 for (i = 0; i < adapter->vfs_allocated_count; i++) {
6802 if (reset_rate)
6803 adapter->vf_data[i].tx_rate = 0;
6804
6805 igb_set_vf_rate_limit(&adapter->hw, i,
6806 adapter->vf_data[i].tx_rate,
6807 actual_link_speed);
6808 }
6809 }
6810
6811 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6812 {
6813 struct igb_adapter *adapter = netdev_priv(netdev);
6814 struct e1000_hw *hw = &adapter->hw;
6815 int actual_link_speed;
6816
6817 if (hw->mac.type != e1000_82576)
6818 return -EOPNOTSUPP;
6819
6820 actual_link_speed = igb_link_mbps(adapter->link_speed);
6821 if ((vf >= adapter->vfs_allocated_count) ||
6822 (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6823 (tx_rate < 0) || (tx_rate > actual_link_speed))
6824 return -EINVAL;
6825
6826 adapter->vf_rate_link_speed = actual_link_speed;
6827 adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6828 igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6829
6830 return 0;
6831 }
6832
6833 static int igb_ndo_get_vf_config(struct net_device *netdev,
6834 int vf, struct ifla_vf_info *ivi)
6835 {
6836 struct igb_adapter *adapter = netdev_priv(netdev);
6837 if (vf >= adapter->vfs_allocated_count)
6838 return -EINVAL;
6839 ivi->vf = vf;
6840 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6841 ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6842 ivi->vlan = adapter->vf_data[vf].pf_vlan;
6843 ivi->qos = adapter->vf_data[vf].pf_qos;
6844 return 0;
6845 }
6846
6847 static void igb_vmm_control(struct igb_adapter *adapter)
6848 {
6849 struct e1000_hw *hw = &adapter->hw;
6850 u32 reg;
6851
6852 switch (hw->mac.type) {
6853 case e1000_82575:
6854 default:
6855 /* replication is not supported for 82575 */
6856 return;
6857 case e1000_82576:
6858 /* notify HW that the MAC is adding vlan tags */
6859 reg = rd32(E1000_DTXCTL);
6860 reg |= E1000_DTXCTL_VLAN_ADDED;
6861 wr32(E1000_DTXCTL, reg);
6862 case e1000_82580:
6863 /* enable replication vlan tag stripping */
6864 reg = rd32(E1000_RPLOLR);
6865 reg |= E1000_RPLOLR_STRVLAN;
6866 wr32(E1000_RPLOLR, reg);
6867 case e1000_i350:
6868 /* none of the above registers are supported by i350 */
6869 break;
6870 }
6871
6872 if (adapter->vfs_allocated_count) {
6873 igb_vmdq_set_loopback_pf(hw, true);
6874 igb_vmdq_set_replication_pf(hw, true);
6875 igb_vmdq_set_anti_spoofing_pf(hw, true,
6876 adapter->vfs_allocated_count);
6877 } else {
6878 igb_vmdq_set_loopback_pf(hw, false);
6879 igb_vmdq_set_replication_pf(hw, false);
6880 }
6881 }
6882
6883 /* igb_main.c */
This page took 0.251182 seconds and 6 git commands to generate.