cxgb3 - Fix dev->priv usage
[deliverable/linux.git] / drivers / net / cxgb3 / sge.c
CommitLineData
4d22de3e 1/*
1d68e93d 2 * Copyright (c) 2005-2007 Chelsio, Inc. All rights reserved.
4d22de3e 3 *
1d68e93d
DLR
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
4d22de3e 9 *
1d68e93d
DLR
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
4d22de3e 31 */
4d22de3e
DLR
32#include <linux/skbuff.h>
33#include <linux/netdevice.h>
34#include <linux/etherdevice.h>
35#include <linux/if_vlan.h>
36#include <linux/ip.h>
37#include <linux/tcp.h>
38#include <linux/dma-mapping.h>
39#include "common.h"
40#include "regs.h"
41#include "sge_defs.h"
42#include "t3_cpl.h"
43#include "firmware_exports.h"
44
45#define USE_GTS 0
46
47#define SGE_RX_SM_BUF_SIZE 1536
e0994eb1 48
4d22de3e 49#define SGE_RX_COPY_THRES 256
cf992af5 50#define SGE_RX_PULL_LEN 128
4d22de3e 51
e0994eb1 52/*
cf992af5
DLR
53 * Page chunk size for FL0 buffers if FL0 is to be populated with page chunks.
54 * It must be a divisor of PAGE_SIZE. If set to 0 FL0 will use sk_buffs
55 * directly.
e0994eb1 56 */
cf992af5
DLR
57#define FL0_PG_CHUNK_SIZE 2048
58
e0994eb1 59#define SGE_RX_DROP_THRES 16
4d22de3e
DLR
60
61/*
62 * Period of the Tx buffer reclaim timer. This timer does not need to run
63 * frequently as Tx buffers are usually reclaimed by new Tx packets.
64 */
65#define TX_RECLAIM_PERIOD (HZ / 4)
66
67/* WR size in bytes */
68#define WR_LEN (WR_FLITS * 8)
69
70/*
71 * Types of Tx queues in each queue set. Order here matters, do not change.
72 */
73enum { TXQ_ETH, TXQ_OFLD, TXQ_CTRL };
74
75/* Values for sge_txq.flags */
76enum {
77 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
78 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
79};
80
81struct tx_desc {
82 u64 flit[TX_DESC_FLITS];
83};
84
85struct rx_desc {
86 __be32 addr_lo;
87 __be32 len_gen;
88 __be32 gen2;
89 __be32 addr_hi;
90};
91
92struct tx_sw_desc { /* SW state per Tx descriptor */
93 struct sk_buff *skb;
94};
95
cf992af5 96struct rx_sw_desc { /* SW state per Rx descriptor */
e0994eb1
DLR
97 union {
98 struct sk_buff *skb;
cf992af5
DLR
99 struct fl_pg_chunk pg_chunk;
100 };
101 DECLARE_PCI_UNMAP_ADDR(dma_addr);
4d22de3e
DLR
102};
103
104struct rsp_desc { /* response queue descriptor */
105 struct rss_header rss_hdr;
106 __be32 flags;
107 __be32 len_cq;
108 u8 imm_data[47];
109 u8 intr_gen;
110};
111
112struct unmap_info { /* packet unmapping info, overlays skb->cb */
113 int sflit; /* start flit of first SGL entry in Tx descriptor */
114 u16 fragidx; /* first page fragment in current Tx descriptor */
115 u16 addr_idx; /* buffer index of first SGL entry in descriptor */
116 u32 len; /* mapped length of skb main body */
117};
118
99d7cf30
DLR
119/*
120 * Holds unmapping information for Tx packets that need deferred unmapping.
121 * This structure lives at skb->head and must be allocated by callers.
122 */
123struct deferred_unmap_info {
124 struct pci_dev *pdev;
125 dma_addr_t addr[MAX_SKB_FRAGS + 1];
126};
127
4d22de3e
DLR
128/*
129 * Maps a number of flits to the number of Tx descriptors that can hold them.
130 * The formula is
131 *
132 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
133 *
134 * HW allows up to 4 descriptors to be combined into a WR.
135 */
136static u8 flit_desc_map[] = {
137 0,
138#if SGE_NUM_GENBITS == 1
139 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
140 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
141 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
142 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
143#elif SGE_NUM_GENBITS == 2
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
146 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
147 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
148#else
149# error "SGE_NUM_GENBITS must be 1 or 2"
150#endif
151};
152
153static inline struct sge_qset *fl_to_qset(const struct sge_fl *q, int qidx)
154{
155 return container_of(q, struct sge_qset, fl[qidx]);
156}
157
158static inline struct sge_qset *rspq_to_qset(const struct sge_rspq *q)
159{
160 return container_of(q, struct sge_qset, rspq);
161}
162
163static inline struct sge_qset *txq_to_qset(const struct sge_txq *q, int qidx)
164{
165 return container_of(q, struct sge_qset, txq[qidx]);
166}
167
168/**
169 * refill_rspq - replenish an SGE response queue
170 * @adapter: the adapter
171 * @q: the response queue to replenish
172 * @credits: how many new responses to make available
173 *
174 * Replenishes a response queue by making the supplied number of responses
175 * available to HW.
176 */
177static inline void refill_rspq(struct adapter *adapter,
178 const struct sge_rspq *q, unsigned int credits)
179{
180 t3_write_reg(adapter, A_SG_RSPQ_CREDIT_RETURN,
181 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
182}
183
184/**
185 * need_skb_unmap - does the platform need unmapping of sk_buffs?
186 *
187 * Returns true if the platfrom needs sk_buff unmapping. The compiler
188 * optimizes away unecessary code if this returns true.
189 */
190static inline int need_skb_unmap(void)
191{
192 /*
193 * This structure is used to tell if the platfrom needs buffer
194 * unmapping by checking if DECLARE_PCI_UNMAP_ADDR defines anything.
195 */
196 struct dummy {
197 DECLARE_PCI_UNMAP_ADDR(addr);
198 };
199
200 return sizeof(struct dummy) != 0;
201}
202
203/**
204 * unmap_skb - unmap a packet main body and its page fragments
205 * @skb: the packet
206 * @q: the Tx queue containing Tx descriptors for the packet
207 * @cidx: index of Tx descriptor
208 * @pdev: the PCI device
209 *
210 * Unmap the main body of an sk_buff and its page fragments, if any.
211 * Because of the fairly complicated structure of our SGLs and the desire
212 * to conserve space for metadata, we keep the information necessary to
213 * unmap an sk_buff partly in the sk_buff itself (in its cb), and partly
214 * in the Tx descriptors (the physical addresses of the various data
215 * buffers). The send functions initialize the state in skb->cb so we
216 * can unmap the buffers held in the first Tx descriptor here, and we
217 * have enough information at this point to update the state for the next
218 * Tx descriptor.
219 */
220static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q,
221 unsigned int cidx, struct pci_dev *pdev)
222{
223 const struct sg_ent *sgp;
224 struct unmap_info *ui = (struct unmap_info *)skb->cb;
225 int nfrags, frag_idx, curflit, j = ui->addr_idx;
226
227 sgp = (struct sg_ent *)&q->desc[cidx].flit[ui->sflit];
228
229 if (ui->len) {
230 pci_unmap_single(pdev, be64_to_cpu(sgp->addr[0]), ui->len,
231 PCI_DMA_TODEVICE);
232 ui->len = 0; /* so we know for next descriptor for this skb */
233 j = 1;
234 }
235
236 frag_idx = ui->fragidx;
237 curflit = ui->sflit + 1 + j;
238 nfrags = skb_shinfo(skb)->nr_frags;
239
240 while (frag_idx < nfrags && curflit < WR_FLITS) {
241 pci_unmap_page(pdev, be64_to_cpu(sgp->addr[j]),
242 skb_shinfo(skb)->frags[frag_idx].size,
243 PCI_DMA_TODEVICE);
244 j ^= 1;
245 if (j == 0) {
246 sgp++;
247 curflit++;
248 }
249 curflit++;
250 frag_idx++;
251 }
252
253 if (frag_idx < nfrags) { /* SGL continues into next Tx descriptor */
254 ui->fragidx = frag_idx;
255 ui->addr_idx = j;
256 ui->sflit = curflit - WR_FLITS - j; /* sflit can be -1 */
257 }
258}
259
260/**
261 * free_tx_desc - reclaims Tx descriptors and their buffers
262 * @adapter: the adapter
263 * @q: the Tx queue to reclaim descriptors from
264 * @n: the number of descriptors to reclaim
265 *
266 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
267 * Tx buffers. Called with the Tx queue lock held.
268 */
269static void free_tx_desc(struct adapter *adapter, struct sge_txq *q,
270 unsigned int n)
271{
272 struct tx_sw_desc *d;
273 struct pci_dev *pdev = adapter->pdev;
274 unsigned int cidx = q->cidx;
275
99d7cf30
DLR
276 const int need_unmap = need_skb_unmap() &&
277 q->cntxt_id >= FW_TUNNEL_SGEEC_START;
278
4d22de3e
DLR
279 d = &q->sdesc[cidx];
280 while (n--) {
281 if (d->skb) { /* an SGL is present */
99d7cf30 282 if (need_unmap)
4d22de3e
DLR
283 unmap_skb(d->skb, q, cidx, pdev);
284 if (d->skb->priority == cidx)
285 kfree_skb(d->skb);
286 }
287 ++d;
288 if (++cidx == q->size) {
289 cidx = 0;
290 d = q->sdesc;
291 }
292 }
293 q->cidx = cidx;
294}
295
296/**
297 * reclaim_completed_tx - reclaims completed Tx descriptors
298 * @adapter: the adapter
299 * @q: the Tx queue to reclaim completed descriptors from
300 *
301 * Reclaims Tx descriptors that the SGE has indicated it has processed,
302 * and frees the associated buffers if possible. Called with the Tx
303 * queue's lock held.
304 */
305static inline void reclaim_completed_tx(struct adapter *adapter,
306 struct sge_txq *q)
307{
308 unsigned int reclaim = q->processed - q->cleaned;
309
310 if (reclaim) {
311 free_tx_desc(adapter, q, reclaim);
312 q->cleaned += reclaim;
313 q->in_use -= reclaim;
314 }
315}
316
317/**
318 * should_restart_tx - are there enough resources to restart a Tx queue?
319 * @q: the Tx queue
320 *
321 * Checks if there are enough descriptors to restart a suspended Tx queue.
322 */
323static inline int should_restart_tx(const struct sge_txq *q)
324{
325 unsigned int r = q->processed - q->cleaned;
326
327 return q->in_use - r < (q->size >> 1);
328}
329
330/**
331 * free_rx_bufs - free the Rx buffers on an SGE free list
332 * @pdev: the PCI device associated with the adapter
333 * @rxq: the SGE free list to clean up
334 *
335 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
336 * this queue should be stopped before calling this function.
337 */
338static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
339{
340 unsigned int cidx = q->cidx;
341
342 while (q->credits--) {
343 struct rx_sw_desc *d = &q->sdesc[cidx];
344
345 pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr),
346 q->buf_size, PCI_DMA_FROMDEVICE);
cf992af5
DLR
347 if (q->use_pages) {
348 put_page(d->pg_chunk.page);
349 d->pg_chunk.page = NULL;
e0994eb1 350 } else {
cf992af5
DLR
351 kfree_skb(d->skb);
352 d->skb = NULL;
e0994eb1 353 }
4d22de3e
DLR
354 if (++cidx == q->size)
355 cidx = 0;
356 }
e0994eb1 357
cf992af5
DLR
358 if (q->pg_chunk.page) {
359 __free_page(q->pg_chunk.page);
360 q->pg_chunk.page = NULL;
361 }
4d22de3e
DLR
362}
363
364/**
365 * add_one_rx_buf - add a packet buffer to a free-buffer list
cf992af5 366 * @va: buffer start VA
4d22de3e
DLR
367 * @len: the buffer length
368 * @d: the HW Rx descriptor to write
369 * @sd: the SW Rx descriptor to write
370 * @gen: the generation bit value
371 * @pdev: the PCI device associated with the adapter
372 *
373 * Add a buffer of the given length to the supplied HW and SW Rx
374 * descriptors.
375 */
cf992af5 376static inline void add_one_rx_buf(void *va, unsigned int len,
4d22de3e
DLR
377 struct rx_desc *d, struct rx_sw_desc *sd,
378 unsigned int gen, struct pci_dev *pdev)
379{
380 dma_addr_t mapping;
381
e0994eb1 382 mapping = pci_map_single(pdev, va, len, PCI_DMA_FROMDEVICE);
4d22de3e
DLR
383 pci_unmap_addr_set(sd, dma_addr, mapping);
384
385 d->addr_lo = cpu_to_be32(mapping);
386 d->addr_hi = cpu_to_be32((u64) mapping >> 32);
387 wmb();
388 d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
389 d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
390}
391
cf992af5
DLR
392static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp)
393{
394 if (!q->pg_chunk.page) {
395 q->pg_chunk.page = alloc_page(gfp);
396 if (unlikely(!q->pg_chunk.page))
397 return -ENOMEM;
398 q->pg_chunk.va = page_address(q->pg_chunk.page);
399 q->pg_chunk.offset = 0;
400 }
401 sd->pg_chunk = q->pg_chunk;
402
403 q->pg_chunk.offset += q->buf_size;
404 if (q->pg_chunk.offset == PAGE_SIZE)
405 q->pg_chunk.page = NULL;
406 else {
407 q->pg_chunk.va += q->buf_size;
408 get_page(q->pg_chunk.page);
409 }
410 return 0;
411}
412
4d22de3e
DLR
413/**
414 * refill_fl - refill an SGE free-buffer list
415 * @adapter: the adapter
416 * @q: the free-list to refill
417 * @n: the number of new buffers to allocate
418 * @gfp: the gfp flags for allocating new buffers
419 *
420 * (Re)populate an SGE free-buffer list with up to @n new packet buffers,
421 * allocated with the supplied gfp flags. The caller must assure that
422 * @n does not exceed the queue's capacity.
423 */
424static void refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp)
425{
cf992af5 426 void *buf_start;
4d22de3e
DLR
427 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
428 struct rx_desc *d = &q->desc[q->pidx];
429
430 while (n--) {
cf992af5
DLR
431 if (q->use_pages) {
432 if (unlikely(alloc_pg_chunk(q, sd, gfp))) {
433nomem: q->alloc_failed++;
e0994eb1
DLR
434 break;
435 }
cf992af5 436 buf_start = sd->pg_chunk.va;
e0994eb1 437 } else {
cf992af5 438 struct sk_buff *skb = alloc_skb(q->buf_size, gfp);
e0994eb1 439
cf992af5
DLR
440 if (!skb)
441 goto nomem;
e0994eb1 442
cf992af5
DLR
443 sd->skb = skb;
444 buf_start = skb->data;
e0994eb1
DLR
445 }
446
cf992af5
DLR
447 add_one_rx_buf(buf_start, q->buf_size, d, sd, q->gen,
448 adap->pdev);
4d22de3e
DLR
449 d++;
450 sd++;
451 if (++q->pidx == q->size) {
452 q->pidx = 0;
453 q->gen ^= 1;
454 sd = q->sdesc;
455 d = q->desc;
456 }
457 q->credits++;
458 }
459
460 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
461}
462
463static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
464{
465 refill_fl(adap, fl, min(16U, fl->size - fl->credits), GFP_ATOMIC);
466}
467
468/**
469 * recycle_rx_buf - recycle a receive buffer
470 * @adapter: the adapter
471 * @q: the SGE free list
472 * @idx: index of buffer to recycle
473 *
474 * Recycles the specified buffer on the given free list by adding it at
475 * the next available slot on the list.
476 */
477static void recycle_rx_buf(struct adapter *adap, struct sge_fl *q,
478 unsigned int idx)
479{
480 struct rx_desc *from = &q->desc[idx];
481 struct rx_desc *to = &q->desc[q->pidx];
482
cf992af5 483 q->sdesc[q->pidx] = q->sdesc[idx];
4d22de3e
DLR
484 to->addr_lo = from->addr_lo; /* already big endian */
485 to->addr_hi = from->addr_hi; /* likewise */
486 wmb();
487 to->len_gen = cpu_to_be32(V_FLD_GEN1(q->gen));
488 to->gen2 = cpu_to_be32(V_FLD_GEN2(q->gen));
489 q->credits++;
490
491 if (++q->pidx == q->size) {
492 q->pidx = 0;
493 q->gen ^= 1;
494 }
495 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
496}
497
498/**
499 * alloc_ring - allocate resources for an SGE descriptor ring
500 * @pdev: the PCI device
501 * @nelem: the number of descriptors
502 * @elem_size: the size of each descriptor
503 * @sw_size: the size of the SW state associated with each ring element
504 * @phys: the physical address of the allocated ring
505 * @metadata: address of the array holding the SW state for the ring
506 *
507 * Allocates resources for an SGE descriptor ring, such as Tx queues,
508 * free buffer lists, or response queues. Each SGE ring requires
509 * space for its HW descriptors plus, optionally, space for the SW state
510 * associated with each HW entry (the metadata). The function returns
511 * three values: the virtual address for the HW ring (the return value
512 * of the function), the physical address of the HW ring, and the address
513 * of the SW ring.
514 */
515static void *alloc_ring(struct pci_dev *pdev, size_t nelem, size_t elem_size,
e0994eb1 516 size_t sw_size, dma_addr_t * phys, void *metadata)
4d22de3e
DLR
517{
518 size_t len = nelem * elem_size;
519 void *s = NULL;
520 void *p = dma_alloc_coherent(&pdev->dev, len, phys, GFP_KERNEL);
521
522 if (!p)
523 return NULL;
524 if (sw_size) {
525 s = kcalloc(nelem, sw_size, GFP_KERNEL);
526
527 if (!s) {
528 dma_free_coherent(&pdev->dev, len, p, *phys);
529 return NULL;
530 }
531 }
532 if (metadata)
533 *(void **)metadata = s;
534 memset(p, 0, len);
535 return p;
536}
537
538/**
539 * free_qset - free the resources of an SGE queue set
540 * @adapter: the adapter owning the queue set
541 * @q: the queue set
542 *
543 * Release the HW and SW resources associated with an SGE queue set, such
544 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
545 * queue set must be quiesced prior to calling this.
546 */
547void t3_free_qset(struct adapter *adapter, struct sge_qset *q)
548{
549 int i;
550 struct pci_dev *pdev = adapter->pdev;
551
552 if (q->tx_reclaim_timer.function)
553 del_timer_sync(&q->tx_reclaim_timer);
554
555 for (i = 0; i < SGE_RXQ_PER_SET; ++i)
556 if (q->fl[i].desc) {
557 spin_lock(&adapter->sge.reg_lock);
558 t3_sge_disable_fl(adapter, q->fl[i].cntxt_id);
559 spin_unlock(&adapter->sge.reg_lock);
560 free_rx_bufs(pdev, &q->fl[i]);
561 kfree(q->fl[i].sdesc);
562 dma_free_coherent(&pdev->dev,
563 q->fl[i].size *
564 sizeof(struct rx_desc), q->fl[i].desc,
565 q->fl[i].phys_addr);
566 }
567
568 for (i = 0; i < SGE_TXQ_PER_SET; ++i)
569 if (q->txq[i].desc) {
570 spin_lock(&adapter->sge.reg_lock);
571 t3_sge_enable_ecntxt(adapter, q->txq[i].cntxt_id, 0);
572 spin_unlock(&adapter->sge.reg_lock);
573 if (q->txq[i].sdesc) {
574 free_tx_desc(adapter, &q->txq[i],
575 q->txq[i].in_use);
576 kfree(q->txq[i].sdesc);
577 }
578 dma_free_coherent(&pdev->dev,
579 q->txq[i].size *
580 sizeof(struct tx_desc),
581 q->txq[i].desc, q->txq[i].phys_addr);
582 __skb_queue_purge(&q->txq[i].sendq);
583 }
584
585 if (q->rspq.desc) {
586 spin_lock(&adapter->sge.reg_lock);
587 t3_sge_disable_rspcntxt(adapter, q->rspq.cntxt_id);
588 spin_unlock(&adapter->sge.reg_lock);
589 dma_free_coherent(&pdev->dev,
590 q->rspq.size * sizeof(struct rsp_desc),
591 q->rspq.desc, q->rspq.phys_addr);
592 }
593
594 if (q->netdev)
595 q->netdev->atalk_ptr = NULL;
596
597 memset(q, 0, sizeof(*q));
598}
599
600/**
601 * init_qset_cntxt - initialize an SGE queue set context info
602 * @qs: the queue set
603 * @id: the queue set id
604 *
605 * Initializes the TIDs and context ids for the queues of a queue set.
606 */
607static void init_qset_cntxt(struct sge_qset *qs, unsigned int id)
608{
609 qs->rspq.cntxt_id = id;
610 qs->fl[0].cntxt_id = 2 * id;
611 qs->fl[1].cntxt_id = 2 * id + 1;
612 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
613 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
614 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
615 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
616 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
617}
618
619/**
620 * sgl_len - calculates the size of an SGL of the given capacity
621 * @n: the number of SGL entries
622 *
623 * Calculates the number of flits needed for a scatter/gather list that
624 * can hold the given number of entries.
625 */
626static inline unsigned int sgl_len(unsigned int n)
627{
628 /* alternatively: 3 * (n / 2) + 2 * (n & 1) */
629 return (3 * n) / 2 + (n & 1);
630}
631
632/**
633 * flits_to_desc - returns the num of Tx descriptors for the given flits
634 * @n: the number of flits
635 *
636 * Calculates the number of Tx descriptors needed for the supplied number
637 * of flits.
638 */
639static inline unsigned int flits_to_desc(unsigned int n)
640{
641 BUG_ON(n >= ARRAY_SIZE(flit_desc_map));
642 return flit_desc_map[n];
643}
644
cf992af5
DLR
645/**
646 * get_packet - return the next ingress packet buffer from a free list
647 * @adap: the adapter that received the packet
648 * @fl: the SGE free list holding the packet
649 * @len: the packet length including any SGE padding
650 * @drop_thres: # of remaining buffers before we start dropping packets
651 *
652 * Get the next packet from a free list and complete setup of the
653 * sk_buff. If the packet is small we make a copy and recycle the
654 * original buffer, otherwise we use the original buffer itself. If a
655 * positive drop threshold is supplied packets are dropped and their
656 * buffers recycled if (a) the number of remaining buffers is under the
657 * threshold and the packet is too big to copy, or (b) the packet should
658 * be copied but there is no memory for the copy.
659 */
660static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
661 unsigned int len, unsigned int drop_thres)
662{
663 struct sk_buff *skb = NULL;
664 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
665
666 prefetch(sd->skb->data);
667 fl->credits--;
668
669 if (len <= SGE_RX_COPY_THRES) {
670 skb = alloc_skb(len, GFP_ATOMIC);
671 if (likely(skb != NULL)) {
672 __skb_put(skb, len);
673 pci_dma_sync_single_for_cpu(adap->pdev,
674 pci_unmap_addr(sd, dma_addr), len,
675 PCI_DMA_FROMDEVICE);
676 memcpy(skb->data, sd->skb->data, len);
677 pci_dma_sync_single_for_device(adap->pdev,
678 pci_unmap_addr(sd, dma_addr), len,
679 PCI_DMA_FROMDEVICE);
680 } else if (!drop_thres)
681 goto use_orig_buf;
682recycle:
683 recycle_rx_buf(adap, fl, fl->cidx);
684 return skb;
685 }
686
687 if (unlikely(fl->credits < drop_thres))
688 goto recycle;
689
690use_orig_buf:
691 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
692 fl->buf_size, PCI_DMA_FROMDEVICE);
693 skb = sd->skb;
694 skb_put(skb, len);
695 __refill_fl(adap, fl);
696 return skb;
697}
698
699/**
700 * get_packet_pg - return the next ingress packet buffer from a free list
701 * @adap: the adapter that received the packet
702 * @fl: the SGE free list holding the packet
703 * @len: the packet length including any SGE padding
704 * @drop_thres: # of remaining buffers before we start dropping packets
705 *
706 * Get the next packet from a free list populated with page chunks.
707 * If the packet is small we make a copy and recycle the original buffer,
708 * otherwise we attach the original buffer as a page fragment to a fresh
709 * sk_buff. If a positive drop threshold is supplied packets are dropped
710 * and their buffers recycled if (a) the number of remaining buffers is
711 * under the threshold and the packet is too big to copy, or (b) there's
712 * no system memory.
713 *
714 * Note: this function is similar to @get_packet but deals with Rx buffers
715 * that are page chunks rather than sk_buffs.
716 */
717static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
718 unsigned int len, unsigned int drop_thres)
719{
720 struct sk_buff *skb = NULL;
721 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
722
723 if (len <= SGE_RX_COPY_THRES) {
724 skb = alloc_skb(len, GFP_ATOMIC);
725 if (likely(skb != NULL)) {
726 __skb_put(skb, len);
727 pci_dma_sync_single_for_cpu(adap->pdev,
728 pci_unmap_addr(sd, dma_addr), len,
729 PCI_DMA_FROMDEVICE);
730 memcpy(skb->data, sd->pg_chunk.va, len);
731 pci_dma_sync_single_for_device(adap->pdev,
732 pci_unmap_addr(sd, dma_addr), len,
733 PCI_DMA_FROMDEVICE);
734 } else if (!drop_thres)
735 return NULL;
736recycle:
737 fl->credits--;
738 recycle_rx_buf(adap, fl, fl->cidx);
739 return skb;
740 }
741
742 if (unlikely(fl->credits <= drop_thres))
743 goto recycle;
744
745 skb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC);
746 if (unlikely(!skb)) {
747 if (!drop_thres)
748 return NULL;
749 goto recycle;
750 }
751
752 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
753 fl->buf_size, PCI_DMA_FROMDEVICE);
754 __skb_put(skb, SGE_RX_PULL_LEN);
755 memcpy(skb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
756 skb_fill_page_desc(skb, 0, sd->pg_chunk.page,
757 sd->pg_chunk.offset + SGE_RX_PULL_LEN,
758 len - SGE_RX_PULL_LEN);
759 skb->len = len;
760 skb->data_len = len - SGE_RX_PULL_LEN;
761 skb->truesize += skb->data_len;
762
763 fl->credits--;
764 /*
765 * We do not refill FLs here, we let the caller do it to overlap a
766 * prefetch.
767 */
768 return skb;
769}
770
4d22de3e
DLR
771/**
772 * get_imm_packet - return the next ingress packet buffer from a response
773 * @resp: the response descriptor containing the packet data
774 *
775 * Return a packet containing the immediate data of the given response.
776 */
777static inline struct sk_buff *get_imm_packet(const struct rsp_desc *resp)
778{
779 struct sk_buff *skb = alloc_skb(IMMED_PKT_SIZE, GFP_ATOMIC);
780
781 if (skb) {
782 __skb_put(skb, IMMED_PKT_SIZE);
27d7ff46 783 skb_copy_to_linear_data(skb, resp->imm_data, IMMED_PKT_SIZE);
4d22de3e
DLR
784 }
785 return skb;
786}
787
788/**
789 * calc_tx_descs - calculate the number of Tx descriptors for a packet
790 * @skb: the packet
791 *
792 * Returns the number of Tx descriptors needed for the given Ethernet
793 * packet. Ethernet packets require addition of WR and CPL headers.
794 */
795static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
796{
797 unsigned int flits;
798
799 if (skb->len <= WR_LEN - sizeof(struct cpl_tx_pkt))
800 return 1;
801
802 flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 2;
803 if (skb_shinfo(skb)->gso_size)
804 flits++;
805 return flits_to_desc(flits);
806}
807
808/**
809 * make_sgl - populate a scatter/gather list for a packet
810 * @skb: the packet
811 * @sgp: the SGL to populate
812 * @start: start address of skb main body data to include in the SGL
813 * @len: length of skb main body data to include in the SGL
814 * @pdev: the PCI device
815 *
816 * Generates a scatter/gather list for the buffers that make up a packet
817 * and returns the SGL size in 8-byte words. The caller must size the SGL
818 * appropriately.
819 */
820static inline unsigned int make_sgl(const struct sk_buff *skb,
821 struct sg_ent *sgp, unsigned char *start,
822 unsigned int len, struct pci_dev *pdev)
823{
824 dma_addr_t mapping;
825 unsigned int i, j = 0, nfrags;
826
827 if (len) {
828 mapping = pci_map_single(pdev, start, len, PCI_DMA_TODEVICE);
829 sgp->len[0] = cpu_to_be32(len);
830 sgp->addr[0] = cpu_to_be64(mapping);
831 j = 1;
832 }
833
834 nfrags = skb_shinfo(skb)->nr_frags;
835 for (i = 0; i < nfrags; i++) {
836 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
837
838 mapping = pci_map_page(pdev, frag->page, frag->page_offset,
839 frag->size, PCI_DMA_TODEVICE);
840 sgp->len[j] = cpu_to_be32(frag->size);
841 sgp->addr[j] = cpu_to_be64(mapping);
842 j ^= 1;
843 if (j == 0)
844 ++sgp;
845 }
846 if (j)
847 sgp->len[j] = 0;
848 return ((nfrags + (len != 0)) * 3) / 2 + j;
849}
850
851/**
852 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
853 * @adap: the adapter
854 * @q: the Tx queue
855 *
856 * Ring the doorbel if a Tx queue is asleep. There is a natural race,
857 * where the HW is going to sleep just after we checked, however,
858 * then the interrupt handler will detect the outstanding TX packet
859 * and ring the doorbell for us.
860 *
861 * When GTS is disabled we unconditionally ring the doorbell.
862 */
863static inline void check_ring_tx_db(struct adapter *adap, struct sge_txq *q)
864{
865#if USE_GTS
866 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
867 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
868 set_bit(TXQ_LAST_PKT_DB, &q->flags);
869 t3_write_reg(adap, A_SG_KDOORBELL,
870 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
871 }
872#else
873 wmb(); /* write descriptors before telling HW */
874 t3_write_reg(adap, A_SG_KDOORBELL,
875 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
876#endif
877}
878
879static inline void wr_gen2(struct tx_desc *d, unsigned int gen)
880{
881#if SGE_NUM_GENBITS == 2
882 d->flit[TX_DESC_FLITS - 1] = cpu_to_be64(gen);
883#endif
884}
885
886/**
887 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
888 * @ndesc: number of Tx descriptors spanned by the SGL
889 * @skb: the packet corresponding to the WR
890 * @d: first Tx descriptor to be written
891 * @pidx: index of above descriptors
892 * @q: the SGE Tx queue
893 * @sgl: the SGL
894 * @flits: number of flits to the start of the SGL in the first descriptor
895 * @sgl_flits: the SGL size in flits
896 * @gen: the Tx descriptor generation
897 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
898 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
899 *
900 * Write a work request header and an associated SGL. If the SGL is
901 * small enough to fit into one Tx descriptor it has already been written
902 * and we just need to write the WR header. Otherwise we distribute the
903 * SGL across the number of descriptors it spans.
904 */
905static void write_wr_hdr_sgl(unsigned int ndesc, struct sk_buff *skb,
906 struct tx_desc *d, unsigned int pidx,
907 const struct sge_txq *q,
908 const struct sg_ent *sgl,
909 unsigned int flits, unsigned int sgl_flits,
910 unsigned int gen, unsigned int wr_hi,
911 unsigned int wr_lo)
912{
913 struct work_request_hdr *wrp = (struct work_request_hdr *)d;
914 struct tx_sw_desc *sd = &q->sdesc[pidx];
915
916 sd->skb = skb;
917 if (need_skb_unmap()) {
918 struct unmap_info *ui = (struct unmap_info *)skb->cb;
919
920 ui->fragidx = 0;
921 ui->addr_idx = 0;
922 ui->sflit = flits;
923 }
924
925 if (likely(ndesc == 1)) {
926 skb->priority = pidx;
927 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
928 V_WR_SGLSFLT(flits)) | wr_hi;
929 wmb();
930 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
931 V_WR_GEN(gen)) | wr_lo;
932 wr_gen2(d, gen);
933 } else {
934 unsigned int ogen = gen;
935 const u64 *fp = (const u64 *)sgl;
936 struct work_request_hdr *wp = wrp;
937
938 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
939 V_WR_SGLSFLT(flits)) | wr_hi;
940
941 while (sgl_flits) {
942 unsigned int avail = WR_FLITS - flits;
943
944 if (avail > sgl_flits)
945 avail = sgl_flits;
946 memcpy(&d->flit[flits], fp, avail * sizeof(*fp));
947 sgl_flits -= avail;
948 ndesc--;
949 if (!sgl_flits)
950 break;
951
952 fp += avail;
953 d++;
954 sd++;
955 if (++pidx == q->size) {
956 pidx = 0;
957 gen ^= 1;
958 d = q->desc;
959 sd = q->sdesc;
960 }
961
962 sd->skb = skb;
963 wrp = (struct work_request_hdr *)d;
964 wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
965 V_WR_SGLSFLT(1)) | wr_hi;
966 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
967 sgl_flits + 1)) |
968 V_WR_GEN(gen)) | wr_lo;
969 wr_gen2(d, gen);
970 flits = 1;
971 }
972 skb->priority = pidx;
973 wrp->wr_hi |= htonl(F_WR_EOP);
974 wmb();
975 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
976 wr_gen2((struct tx_desc *)wp, ogen);
977 WARN_ON(ndesc != 0);
978 }
979}
980
981/**
982 * write_tx_pkt_wr - write a TX_PKT work request
983 * @adap: the adapter
984 * @skb: the packet to send
985 * @pi: the egress interface
986 * @pidx: index of the first Tx descriptor to write
987 * @gen: the generation value to use
988 * @q: the Tx queue
989 * @ndesc: number of descriptors the packet will occupy
990 * @compl: the value of the COMPL bit to use
991 *
992 * Generate a TX_PKT work request to send the supplied packet.
993 */
994static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
995 const struct port_info *pi,
996 unsigned int pidx, unsigned int gen,
997 struct sge_txq *q, unsigned int ndesc,
998 unsigned int compl)
999{
1000 unsigned int flits, sgl_flits, cntrl, tso_info;
1001 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1002 struct tx_desc *d = &q->desc[pidx];
1003 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)d;
1004
1005 cpl->len = htonl(skb->len | 0x80000000);
1006 cntrl = V_TXPKT_INTF(pi->port_id);
1007
1008 if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1009 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(vlan_tx_tag_get(skb));
1010
1011 tso_info = V_LSO_MSS(skb_shinfo(skb)->gso_size);
1012 if (tso_info) {
1013 int eth_type;
1014 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)cpl;
1015
1016 d->flit[2] = 0;
1017 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1018 hdr->cntrl = htonl(cntrl);
bbe735e4 1019 eth_type = skb_network_offset(skb) == ETH_HLEN ?
4d22de3e
DLR
1020 CPL_ETH_II : CPL_ETH_II_VLAN;
1021 tso_info |= V_LSO_ETH_TYPE(eth_type) |
eddc9ec5 1022 V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) |
aa8223c7 1023 V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff);
4d22de3e
DLR
1024 hdr->lso_info = htonl(tso_info);
1025 flits = 3;
1026 } else {
1027 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1028 cntrl |= F_TXPKT_IPCSUM_DIS; /* SW calculates IP csum */
1029 cntrl |= V_TXPKT_L4CSUM_DIS(skb->ip_summed != CHECKSUM_PARTIAL);
1030 cpl->cntrl = htonl(cntrl);
1031
1032 if (skb->len <= WR_LEN - sizeof(*cpl)) {
1033 q->sdesc[pidx].skb = NULL;
1034 if (!skb->data_len)
d626f62b
ACM
1035 skb_copy_from_linear_data(skb, &d->flit[2],
1036 skb->len);
4d22de3e
DLR
1037 else
1038 skb_copy_bits(skb, 0, &d->flit[2], skb->len);
1039
1040 flits = (skb->len + 7) / 8 + 2;
1041 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(skb->len & 7) |
1042 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT)
1043 | F_WR_SOP | F_WR_EOP | compl);
1044 wmb();
1045 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(gen) |
1046 V_WR_TID(q->token));
1047 wr_gen2(d, gen);
1048 kfree_skb(skb);
1049 return;
1050 }
1051
1052 flits = 2;
1053 }
1054
1055 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1056 sgl_flits = make_sgl(skb, sgp, skb->data, skb_headlen(skb), adap->pdev);
1057 if (need_skb_unmap())
1058 ((struct unmap_info *)skb->cb)->len = skb_headlen(skb);
1059
1060 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, gen,
1061 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | compl),
1062 htonl(V_WR_TID(q->token)));
1063}
1064
1065/**
1066 * eth_xmit - add a packet to the Ethernet Tx queue
1067 * @skb: the packet
1068 * @dev: the egress net device
1069 *
1070 * Add a packet to an SGE Tx queue. Runs with softirqs disabled.
1071 */
1072int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
1073{
1074 unsigned int ndesc, pidx, credits, gen, compl;
1075 const struct port_info *pi = netdev_priv(dev);
5fbf816f 1076 struct adapter *adap = pi->adapter;
4d22de3e
DLR
1077 struct sge_qset *qs = dev2qset(dev);
1078 struct sge_txq *q = &qs->txq[TXQ_ETH];
1079
1080 /*
1081 * The chip min packet length is 9 octets but play safe and reject
1082 * anything shorter than an Ethernet header.
1083 */
1084 if (unlikely(skb->len < ETH_HLEN)) {
1085 dev_kfree_skb(skb);
1086 return NETDEV_TX_OK;
1087 }
1088
1089 spin_lock(&q->lock);
1090 reclaim_completed_tx(adap, q);
1091
1092 credits = q->size - q->in_use;
1093 ndesc = calc_tx_descs(skb);
1094
1095 if (unlikely(credits < ndesc)) {
1096 if (!netif_queue_stopped(dev)) {
1097 netif_stop_queue(dev);
1098 set_bit(TXQ_ETH, &qs->txq_stopped);
1099 q->stops++;
1100 dev_err(&adap->pdev->dev,
1101 "%s: Tx ring %u full while queue awake!\n",
1102 dev->name, q->cntxt_id & 7);
1103 }
1104 spin_unlock(&q->lock);
1105 return NETDEV_TX_BUSY;
1106 }
1107
1108 q->in_use += ndesc;
1109 if (unlikely(credits - ndesc < q->stop_thres)) {
1110 q->stops++;
1111 netif_stop_queue(dev);
1112 set_bit(TXQ_ETH, &qs->txq_stopped);
1113#if !USE_GTS
1114 if (should_restart_tx(q) &&
1115 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1116 q->restarts++;
1117 netif_wake_queue(dev);
1118 }
1119#endif
1120 }
1121
1122 gen = q->gen;
1123 q->unacked += ndesc;
1124 compl = (q->unacked & 8) << (S_WR_COMPL - 3);
1125 q->unacked &= 7;
1126 pidx = q->pidx;
1127 q->pidx += ndesc;
1128 if (q->pidx >= q->size) {
1129 q->pidx -= q->size;
1130 q->gen ^= 1;
1131 }
1132
1133 /* update port statistics */
1134 if (skb->ip_summed == CHECKSUM_COMPLETE)
1135 qs->port_stats[SGE_PSTAT_TX_CSUM]++;
1136 if (skb_shinfo(skb)->gso_size)
1137 qs->port_stats[SGE_PSTAT_TSO]++;
1138 if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1139 qs->port_stats[SGE_PSTAT_VLANINS]++;
1140
1141 dev->trans_start = jiffies;
1142 spin_unlock(&q->lock);
1143
1144 /*
1145 * We do not use Tx completion interrupts to free DMAd Tx packets.
1146 * This is good for performamce but means that we rely on new Tx
1147 * packets arriving to run the destructors of completed packets,
1148 * which open up space in their sockets' send queues. Sometimes
1149 * we do not get such new packets causing Tx to stall. A single
1150 * UDP transmitter is a good example of this situation. We have
1151 * a clean up timer that periodically reclaims completed packets
1152 * but it doesn't run often enough (nor do we want it to) to prevent
1153 * lengthy stalls. A solution to this problem is to run the
1154 * destructor early, after the packet is queued but before it's DMAd.
1155 * A cons is that we lie to socket memory accounting, but the amount
1156 * of extra memory is reasonable (limited by the number of Tx
1157 * descriptors), the packets do actually get freed quickly by new
1158 * packets almost always, and for protocols like TCP that wait for
1159 * acks to really free up the data the extra memory is even less.
1160 * On the positive side we run the destructors on the sending CPU
1161 * rather than on a potentially different completing CPU, usually a
1162 * good thing. We also run them without holding our Tx queue lock,
1163 * unlike what reclaim_completed_tx() would otherwise do.
1164 *
1165 * Run the destructor before telling the DMA engine about the packet
1166 * to make sure it doesn't complete and get freed prematurely.
1167 */
1168 if (likely(!skb_shared(skb)))
1169 skb_orphan(skb);
1170
1171 write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl);
1172 check_ring_tx_db(adap, q);
1173 return NETDEV_TX_OK;
1174}
1175
1176/**
1177 * write_imm - write a packet into a Tx descriptor as immediate data
1178 * @d: the Tx descriptor to write
1179 * @skb: the packet
1180 * @len: the length of packet data to write as immediate data
1181 * @gen: the generation bit value to write
1182 *
1183 * Writes a packet as immediate data into a Tx descriptor. The packet
1184 * contains a work request at its beginning. We must write the packet
1185 * carefully so the SGE doesn't read accidentally before it's written in
1186 * its entirety.
1187 */
1188static inline void write_imm(struct tx_desc *d, struct sk_buff *skb,
1189 unsigned int len, unsigned int gen)
1190{
1191 struct work_request_hdr *from = (struct work_request_hdr *)skb->data;
1192 struct work_request_hdr *to = (struct work_request_hdr *)d;
1193
1194 memcpy(&to[1], &from[1], len - sizeof(*from));
1195 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1196 V_WR_BCNTLFLT(len & 7));
1197 wmb();
1198 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1199 V_WR_LEN((len + 7) / 8));
1200 wr_gen2(d, gen);
1201 kfree_skb(skb);
1202}
1203
1204/**
1205 * check_desc_avail - check descriptor availability on a send queue
1206 * @adap: the adapter
1207 * @q: the send queue
1208 * @skb: the packet needing the descriptors
1209 * @ndesc: the number of Tx descriptors needed
1210 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1211 *
1212 * Checks if the requested number of Tx descriptors is available on an
1213 * SGE send queue. If the queue is already suspended or not enough
1214 * descriptors are available the packet is queued for later transmission.
1215 * Must be called with the Tx queue locked.
1216 *
1217 * Returns 0 if enough descriptors are available, 1 if there aren't
1218 * enough descriptors and the packet has been queued, and 2 if the caller
1219 * needs to retry because there weren't enough descriptors at the
1220 * beginning of the call but some freed up in the mean time.
1221 */
1222static inline int check_desc_avail(struct adapter *adap, struct sge_txq *q,
1223 struct sk_buff *skb, unsigned int ndesc,
1224 unsigned int qid)
1225{
1226 if (unlikely(!skb_queue_empty(&q->sendq))) {
1227 addq_exit:__skb_queue_tail(&q->sendq, skb);
1228 return 1;
1229 }
1230 if (unlikely(q->size - q->in_use < ndesc)) {
1231 struct sge_qset *qs = txq_to_qset(q, qid);
1232
1233 set_bit(qid, &qs->txq_stopped);
1234 smp_mb__after_clear_bit();
1235
1236 if (should_restart_tx(q) &&
1237 test_and_clear_bit(qid, &qs->txq_stopped))
1238 return 2;
1239
1240 q->stops++;
1241 goto addq_exit;
1242 }
1243 return 0;
1244}
1245
1246/**
1247 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1248 * @q: the SGE control Tx queue
1249 *
1250 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1251 * that send only immediate data (presently just the control queues) and
1252 * thus do not have any sk_buffs to release.
1253 */
1254static inline void reclaim_completed_tx_imm(struct sge_txq *q)
1255{
1256 unsigned int reclaim = q->processed - q->cleaned;
1257
1258 q->in_use -= reclaim;
1259 q->cleaned += reclaim;
1260}
1261
1262static inline int immediate(const struct sk_buff *skb)
1263{
1264 return skb->len <= WR_LEN && !skb->data_len;
1265}
1266
1267/**
1268 * ctrl_xmit - send a packet through an SGE control Tx queue
1269 * @adap: the adapter
1270 * @q: the control queue
1271 * @skb: the packet
1272 *
1273 * Send a packet through an SGE control Tx queue. Packets sent through
1274 * a control queue must fit entirely as immediate data in a single Tx
1275 * descriptor and have no page fragments.
1276 */
1277static int ctrl_xmit(struct adapter *adap, struct sge_txq *q,
1278 struct sk_buff *skb)
1279{
1280 int ret;
1281 struct work_request_hdr *wrp = (struct work_request_hdr *)skb->data;
1282
1283 if (unlikely(!immediate(skb))) {
1284 WARN_ON(1);
1285 dev_kfree_skb(skb);
1286 return NET_XMIT_SUCCESS;
1287 }
1288
1289 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1290 wrp->wr_lo = htonl(V_WR_TID(q->token));
1291
1292 spin_lock(&q->lock);
1293 again:reclaim_completed_tx_imm(q);
1294
1295 ret = check_desc_avail(adap, q, skb, 1, TXQ_CTRL);
1296 if (unlikely(ret)) {
1297 if (ret == 1) {
1298 spin_unlock(&q->lock);
1299 return NET_XMIT_CN;
1300 }
1301 goto again;
1302 }
1303
1304 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1305
1306 q->in_use++;
1307 if (++q->pidx >= q->size) {
1308 q->pidx = 0;
1309 q->gen ^= 1;
1310 }
1311 spin_unlock(&q->lock);
1312 wmb();
1313 t3_write_reg(adap, A_SG_KDOORBELL,
1314 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1315 return NET_XMIT_SUCCESS;
1316}
1317
1318/**
1319 * restart_ctrlq - restart a suspended control queue
1320 * @qs: the queue set cotaining the control queue
1321 *
1322 * Resumes transmission on a suspended Tx control queue.
1323 */
1324static void restart_ctrlq(unsigned long data)
1325{
1326 struct sk_buff *skb;
1327 struct sge_qset *qs = (struct sge_qset *)data;
1328 struct sge_txq *q = &qs->txq[TXQ_CTRL];
5fbf816f
DLR
1329 const struct port_info *pi = netdev_priv(qs->netdev);
1330 struct adapter *adap = pi->adapter;
4d22de3e
DLR
1331
1332 spin_lock(&q->lock);
1333 again:reclaim_completed_tx_imm(q);
1334
1335 while (q->in_use < q->size && (skb = __skb_dequeue(&q->sendq)) != NULL) {
1336
1337 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1338
1339 if (++q->pidx >= q->size) {
1340 q->pidx = 0;
1341 q->gen ^= 1;
1342 }
1343 q->in_use++;
1344 }
1345
1346 if (!skb_queue_empty(&q->sendq)) {
1347 set_bit(TXQ_CTRL, &qs->txq_stopped);
1348 smp_mb__after_clear_bit();
1349
1350 if (should_restart_tx(q) &&
1351 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1352 goto again;
1353 q->stops++;
1354 }
1355
1356 spin_unlock(&q->lock);
1357 t3_write_reg(adap, A_SG_KDOORBELL,
1358 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1359}
1360
14ab9892
DLR
1361/*
1362 * Send a management message through control queue 0
1363 */
1364int t3_mgmt_tx(struct adapter *adap, struct sk_buff *skb)
1365{
1366 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], skb);
1367}
1368
99d7cf30
DLR
1369/**
1370 * deferred_unmap_destructor - unmap a packet when it is freed
1371 * @skb: the packet
1372 *
1373 * This is the packet destructor used for Tx packets that need to remain
1374 * mapped until they are freed rather than until their Tx descriptors are
1375 * freed.
1376 */
1377static void deferred_unmap_destructor(struct sk_buff *skb)
1378{
1379 int i;
1380 const dma_addr_t *p;
1381 const struct skb_shared_info *si;
1382 const struct deferred_unmap_info *dui;
1383 const struct unmap_info *ui = (struct unmap_info *)skb->cb;
1384
1385 dui = (struct deferred_unmap_info *)skb->head;
1386 p = dui->addr;
1387
1388 if (ui->len)
1389 pci_unmap_single(dui->pdev, *p++, ui->len, PCI_DMA_TODEVICE);
1390
1391 si = skb_shinfo(skb);
1392 for (i = 0; i < si->nr_frags; i++)
1393 pci_unmap_page(dui->pdev, *p++, si->frags[i].size,
1394 PCI_DMA_TODEVICE);
1395}
1396
1397static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev,
1398 const struct sg_ent *sgl, int sgl_flits)
1399{
1400 dma_addr_t *p;
1401 struct deferred_unmap_info *dui;
1402
1403 dui = (struct deferred_unmap_info *)skb->head;
1404 dui->pdev = pdev;
1405 for (p = dui->addr; sgl_flits >= 3; sgl++, sgl_flits -= 3) {
1406 *p++ = be64_to_cpu(sgl->addr[0]);
1407 *p++ = be64_to_cpu(sgl->addr[1]);
1408 }
1409 if (sgl_flits)
1410 *p = be64_to_cpu(sgl->addr[0]);
1411}
1412
4d22de3e
DLR
1413/**
1414 * write_ofld_wr - write an offload work request
1415 * @adap: the adapter
1416 * @skb: the packet to send
1417 * @q: the Tx queue
1418 * @pidx: index of the first Tx descriptor to write
1419 * @gen: the generation value to use
1420 * @ndesc: number of descriptors the packet will occupy
1421 *
1422 * Write an offload work request to send the supplied packet. The packet
1423 * data already carry the work request with most fields populated.
1424 */
1425static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
1426 struct sge_txq *q, unsigned int pidx,
1427 unsigned int gen, unsigned int ndesc)
1428{
1429 unsigned int sgl_flits, flits;
1430 struct work_request_hdr *from;
1431 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1432 struct tx_desc *d = &q->desc[pidx];
1433
1434 if (immediate(skb)) {
1435 q->sdesc[pidx].skb = NULL;
1436 write_imm(d, skb, skb->len, gen);
1437 return;
1438 }
1439
1440 /* Only TX_DATA builds SGLs */
1441
1442 from = (struct work_request_hdr *)skb->data;
ea2ae17d
ACM
1443 memcpy(&d->flit[1], &from[1],
1444 skb_transport_offset(skb) - sizeof(*from));
4d22de3e 1445
ea2ae17d 1446 flits = skb_transport_offset(skb) / 8;
4d22de3e 1447 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
9c70220b 1448 sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
27a884dc 1449 skb->tail - skb->transport_header,
4d22de3e 1450 adap->pdev);
99d7cf30
DLR
1451 if (need_skb_unmap()) {
1452 setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
1453 skb->destructor = deferred_unmap_destructor;
9c70220b 1454 ((struct unmap_info *)skb->cb)->len = (skb->tail -
27a884dc 1455 skb->transport_header);
99d7cf30 1456 }
4d22de3e
DLR
1457
1458 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
1459 gen, from->wr_hi, from->wr_lo);
1460}
1461
1462/**
1463 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1464 * @skb: the packet
1465 *
1466 * Returns the number of Tx descriptors needed for the given offload
1467 * packet. These packets are already fully constructed.
1468 */
1469static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
1470{
1471 unsigned int flits, cnt = skb_shinfo(skb)->nr_frags;
1472
1473 if (skb->len <= WR_LEN && cnt == 0)
1474 return 1; /* packet fits as immediate data */
1475
ea2ae17d 1476 flits = skb_transport_offset(skb) / 8; /* headers */
27a884dc 1477 if (skb->tail != skb->transport_header)
4d22de3e
DLR
1478 cnt++;
1479 return flits_to_desc(flits + sgl_len(cnt));
1480}
1481
1482/**
1483 * ofld_xmit - send a packet through an offload queue
1484 * @adap: the adapter
1485 * @q: the Tx offload queue
1486 * @skb: the packet
1487 *
1488 * Send an offload packet through an SGE offload queue.
1489 */
1490static int ofld_xmit(struct adapter *adap, struct sge_txq *q,
1491 struct sk_buff *skb)
1492{
1493 int ret;
1494 unsigned int ndesc = calc_tx_descs_ofld(skb), pidx, gen;
1495
1496 spin_lock(&q->lock);
1497 again:reclaim_completed_tx(adap, q);
1498
1499 ret = check_desc_avail(adap, q, skb, ndesc, TXQ_OFLD);
1500 if (unlikely(ret)) {
1501 if (ret == 1) {
1502 skb->priority = ndesc; /* save for restart */
1503 spin_unlock(&q->lock);
1504 return NET_XMIT_CN;
1505 }
1506 goto again;
1507 }
1508
1509 gen = q->gen;
1510 q->in_use += ndesc;
1511 pidx = q->pidx;
1512 q->pidx += ndesc;
1513 if (q->pidx >= q->size) {
1514 q->pidx -= q->size;
1515 q->gen ^= 1;
1516 }
1517 spin_unlock(&q->lock);
1518
1519 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1520 check_ring_tx_db(adap, q);
1521 return NET_XMIT_SUCCESS;
1522}
1523
1524/**
1525 * restart_offloadq - restart a suspended offload queue
1526 * @qs: the queue set cotaining the offload queue
1527 *
1528 * Resumes transmission on a suspended Tx offload queue.
1529 */
1530static void restart_offloadq(unsigned long data)
1531{
1532 struct sk_buff *skb;
1533 struct sge_qset *qs = (struct sge_qset *)data;
1534 struct sge_txq *q = &qs->txq[TXQ_OFLD];
5fbf816f
DLR
1535 const struct port_info *pi = netdev_priv(qs->netdev);
1536 struct adapter *adap = pi->adapter;
4d22de3e
DLR
1537
1538 spin_lock(&q->lock);
1539 again:reclaim_completed_tx(adap, q);
1540
1541 while ((skb = skb_peek(&q->sendq)) != NULL) {
1542 unsigned int gen, pidx;
1543 unsigned int ndesc = skb->priority;
1544
1545 if (unlikely(q->size - q->in_use < ndesc)) {
1546 set_bit(TXQ_OFLD, &qs->txq_stopped);
1547 smp_mb__after_clear_bit();
1548
1549 if (should_restart_tx(q) &&
1550 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1551 goto again;
1552 q->stops++;
1553 break;
1554 }
1555
1556 gen = q->gen;
1557 q->in_use += ndesc;
1558 pidx = q->pidx;
1559 q->pidx += ndesc;
1560 if (q->pidx >= q->size) {
1561 q->pidx -= q->size;
1562 q->gen ^= 1;
1563 }
1564 __skb_unlink(skb, &q->sendq);
1565 spin_unlock(&q->lock);
1566
1567 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1568 spin_lock(&q->lock);
1569 }
1570 spin_unlock(&q->lock);
1571
1572#if USE_GTS
1573 set_bit(TXQ_RUNNING, &q->flags);
1574 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1575#endif
1576 t3_write_reg(adap, A_SG_KDOORBELL,
1577 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1578}
1579
1580/**
1581 * queue_set - return the queue set a packet should use
1582 * @skb: the packet
1583 *
1584 * Maps a packet to the SGE queue set it should use. The desired queue
1585 * set is carried in bits 1-3 in the packet's priority.
1586 */
1587static inline int queue_set(const struct sk_buff *skb)
1588{
1589 return skb->priority >> 1;
1590}
1591
1592/**
1593 * is_ctrl_pkt - return whether an offload packet is a control packet
1594 * @skb: the packet
1595 *
1596 * Determines whether an offload packet should use an OFLD or a CTRL
1597 * Tx queue. This is indicated by bit 0 in the packet's priority.
1598 */
1599static inline int is_ctrl_pkt(const struct sk_buff *skb)
1600{
1601 return skb->priority & 1;
1602}
1603
1604/**
1605 * t3_offload_tx - send an offload packet
1606 * @tdev: the offload device to send to
1607 * @skb: the packet
1608 *
1609 * Sends an offload packet. We use the packet priority to select the
1610 * appropriate Tx queue as follows: bit 0 indicates whether the packet
1611 * should be sent as regular or control, bits 1-3 select the queue set.
1612 */
1613int t3_offload_tx(struct t3cdev *tdev, struct sk_buff *skb)
1614{
1615 struct adapter *adap = tdev2adap(tdev);
1616 struct sge_qset *qs = &adap->sge.qs[queue_set(skb)];
1617
1618 if (unlikely(is_ctrl_pkt(skb)))
1619 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], skb);
1620
1621 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], skb);
1622}
1623
1624/**
1625 * offload_enqueue - add an offload packet to an SGE offload receive queue
1626 * @q: the SGE response queue
1627 * @skb: the packet
1628 *
1629 * Add a new offload packet to an SGE response queue's offload packet
1630 * queue. If the packet is the first on the queue it schedules the RX
1631 * softirq to process the queue.
1632 */
1633static inline void offload_enqueue(struct sge_rspq *q, struct sk_buff *skb)
1634{
1635 skb->next = skb->prev = NULL;
1636 if (q->rx_tail)
1637 q->rx_tail->next = skb;
1638 else {
1639 struct sge_qset *qs = rspq_to_qset(q);
1640
1641 if (__netif_rx_schedule_prep(qs->netdev))
1642 __netif_rx_schedule(qs->netdev);
1643 q->rx_head = skb;
1644 }
1645 q->rx_tail = skb;
1646}
1647
1648/**
1649 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
1650 * @tdev: the offload device that will be receiving the packets
1651 * @q: the SGE response queue that assembled the bundle
1652 * @skbs: the partial bundle
1653 * @n: the number of packets in the bundle
1654 *
1655 * Delivers a (partial) bundle of Rx offload packets to an offload device.
1656 */
1657static inline void deliver_partial_bundle(struct t3cdev *tdev,
1658 struct sge_rspq *q,
1659 struct sk_buff *skbs[], int n)
1660{
1661 if (n) {
1662 q->offload_bundles++;
1663 tdev->recv(tdev, skbs, n);
1664 }
1665}
1666
1667/**
1668 * ofld_poll - NAPI handler for offload packets in interrupt mode
1669 * @dev: the network device doing the polling
1670 * @budget: polling budget
1671 *
1672 * The NAPI handler for offload packets when a response queue is serviced
1673 * by the hard interrupt handler, i.e., when it's operating in non-polling
1674 * mode. Creates small packet batches and sends them through the offload
1675 * receive handler. Batches need to be of modest size as we do prefetches
1676 * on the packets in each.
1677 */
1678static int ofld_poll(struct net_device *dev, int *budget)
1679{
5fbf816f
DLR
1680 const struct port_info *pi = netdev_priv(dev);
1681 struct adapter *adapter = pi->adapter;
4d22de3e
DLR
1682 struct sge_qset *qs = dev2qset(dev);
1683 struct sge_rspq *q = &qs->rspq;
1684 int work_done, limit = min(*budget, dev->quota), avail = limit;
1685
1686 while (avail) {
1687 struct sk_buff *head, *tail, *skbs[RX_BUNDLE_SIZE];
1688 int ngathered;
1689
1690 spin_lock_irq(&q->lock);
1691 head = q->rx_head;
1692 if (!head) {
1693 work_done = limit - avail;
1694 *budget -= work_done;
1695 dev->quota -= work_done;
1696 __netif_rx_complete(dev);
1697 spin_unlock_irq(&q->lock);
1698 return 0;
1699 }
1700
1701 tail = q->rx_tail;
1702 q->rx_head = q->rx_tail = NULL;
1703 spin_unlock_irq(&q->lock);
1704
1705 for (ngathered = 0; avail && head; avail--) {
1706 prefetch(head->data);
1707 skbs[ngathered] = head;
1708 head = head->next;
1709 skbs[ngathered]->next = NULL;
1710 if (++ngathered == RX_BUNDLE_SIZE) {
1711 q->offload_bundles++;
1712 adapter->tdev.recv(&adapter->tdev, skbs,
1713 ngathered);
1714 ngathered = 0;
1715 }
1716 }
1717 if (head) { /* splice remaining packets back onto Rx queue */
1718 spin_lock_irq(&q->lock);
1719 tail->next = q->rx_head;
1720 if (!q->rx_head)
1721 q->rx_tail = tail;
1722 q->rx_head = head;
1723 spin_unlock_irq(&q->lock);
1724 }
1725 deliver_partial_bundle(&adapter->tdev, q, skbs, ngathered);
1726 }
1727 work_done = limit - avail;
1728 *budget -= work_done;
1729 dev->quota -= work_done;
1730 return 1;
1731}
1732
1733/**
1734 * rx_offload - process a received offload packet
1735 * @tdev: the offload device receiving the packet
1736 * @rq: the response queue that received the packet
1737 * @skb: the packet
1738 * @rx_gather: a gather list of packets if we are building a bundle
1739 * @gather_idx: index of the next available slot in the bundle
1740 *
1741 * Process an ingress offload pakcet and add it to the offload ingress
1742 * queue. Returns the index of the next available slot in the bundle.
1743 */
1744static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
1745 struct sk_buff *skb, struct sk_buff *rx_gather[],
1746 unsigned int gather_idx)
1747{
1748 rq->offload_pkts++;
459a98ed 1749 skb_reset_mac_header(skb);
c1d2bbe1 1750 skb_reset_network_header(skb);
badff6d0 1751 skb_reset_transport_header(skb);
4d22de3e
DLR
1752
1753 if (rq->polling) {
1754 rx_gather[gather_idx++] = skb;
1755 if (gather_idx == RX_BUNDLE_SIZE) {
1756 tdev->recv(tdev, rx_gather, RX_BUNDLE_SIZE);
1757 gather_idx = 0;
1758 rq->offload_bundles++;
1759 }
1760 } else
1761 offload_enqueue(rq, skb);
1762
1763 return gather_idx;
1764}
1765
4d22de3e
DLR
1766/**
1767 * restart_tx - check whether to restart suspended Tx queues
1768 * @qs: the queue set to resume
1769 *
1770 * Restarts suspended Tx queues of an SGE queue set if they have enough
1771 * free resources to resume operation.
1772 */
1773static void restart_tx(struct sge_qset *qs)
1774{
1775 if (test_bit(TXQ_ETH, &qs->txq_stopped) &&
1776 should_restart_tx(&qs->txq[TXQ_ETH]) &&
1777 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1778 qs->txq[TXQ_ETH].restarts++;
1779 if (netif_running(qs->netdev))
1780 netif_wake_queue(qs->netdev);
1781 }
1782
1783 if (test_bit(TXQ_OFLD, &qs->txq_stopped) &&
1784 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
1785 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
1786 qs->txq[TXQ_OFLD].restarts++;
1787 tasklet_schedule(&qs->txq[TXQ_OFLD].qresume_tsk);
1788 }
1789 if (test_bit(TXQ_CTRL, &qs->txq_stopped) &&
1790 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
1791 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
1792 qs->txq[TXQ_CTRL].restarts++;
1793 tasklet_schedule(&qs->txq[TXQ_CTRL].qresume_tsk);
1794 }
1795}
1796
1797/**
1798 * rx_eth - process an ingress ethernet packet
1799 * @adap: the adapter
1800 * @rq: the response queue that received the packet
1801 * @skb: the packet
1802 * @pad: amount of padding at the start of the buffer
1803 *
1804 * Process an ingress ethernet pakcet and deliver it to the stack.
1805 * The padding is 2 if the packet was delivered in an Rx buffer and 0
1806 * if it was immediate data in a response.
1807 */
1808static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
1809 struct sk_buff *skb, int pad)
1810{
1811 struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)(skb->data + pad);
1812 struct port_info *pi;
1813
4d22de3e 1814 skb_pull(skb, sizeof(*p) + pad);
4c13eb66 1815 skb->protocol = eth_type_trans(skb, adap->port[p->iff]);
e360b562 1816 skb->dev->last_rx = jiffies;
4d22de3e
DLR
1817 pi = netdev_priv(skb->dev);
1818 if (pi->rx_csum_offload && p->csum_valid && p->csum == 0xffff &&
1819 !p->fragment) {
1820 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
1821 skb->ip_summed = CHECKSUM_UNNECESSARY;
1822 } else
1823 skb->ip_summed = CHECKSUM_NONE;
1824
1825 if (unlikely(p->vlan_valid)) {
1826 struct vlan_group *grp = pi->vlan_grp;
1827
1828 rspq_to_qset(rq)->port_stats[SGE_PSTAT_VLANEX]++;
1829 if (likely(grp))
1830 __vlan_hwaccel_rx(skb, grp, ntohs(p->vlan),
1831 rq->polling);
1832 else
1833 dev_kfree_skb_any(skb);
1834 } else if (rq->polling)
1835 netif_receive_skb(skb);
1836 else
1837 netif_rx(skb);
1838}
1839
1840/**
1841 * handle_rsp_cntrl_info - handles control information in a response
1842 * @qs: the queue set corresponding to the response
1843 * @flags: the response control flags
4d22de3e
DLR
1844 *
1845 * Handles the control information of an SGE response, such as GTS
1846 * indications and completion credits for the queue set's Tx queues.
6195c71d 1847 * HW coalesces credits, we don't do any extra SW coalescing.
4d22de3e 1848 */
6195c71d 1849static inline void handle_rsp_cntrl_info(struct sge_qset *qs, u32 flags)
4d22de3e
DLR
1850{
1851 unsigned int credits;
1852
1853#if USE_GTS
1854 if (flags & F_RSPD_TXQ0_GTS)
1855 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
1856#endif
1857
4d22de3e
DLR
1858 credits = G_RSPD_TXQ0_CR(flags);
1859 if (credits)
1860 qs->txq[TXQ_ETH].processed += credits;
1861
6195c71d
DLR
1862 credits = G_RSPD_TXQ2_CR(flags);
1863 if (credits)
1864 qs->txq[TXQ_CTRL].processed += credits;
1865
4d22de3e
DLR
1866# if USE_GTS
1867 if (flags & F_RSPD_TXQ1_GTS)
1868 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
1869# endif
6195c71d
DLR
1870 credits = G_RSPD_TXQ1_CR(flags);
1871 if (credits)
1872 qs->txq[TXQ_OFLD].processed += credits;
4d22de3e
DLR
1873}
1874
1875/**
1876 * check_ring_db - check if we need to ring any doorbells
1877 * @adapter: the adapter
1878 * @qs: the queue set whose Tx queues are to be examined
1879 * @sleeping: indicates which Tx queue sent GTS
1880 *
1881 * Checks if some of a queue set's Tx queues need to ring their doorbells
1882 * to resume transmission after idling while they still have unprocessed
1883 * descriptors.
1884 */
1885static void check_ring_db(struct adapter *adap, struct sge_qset *qs,
1886 unsigned int sleeping)
1887{
1888 if (sleeping & F_RSPD_TXQ0_GTS) {
1889 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1890
1891 if (txq->cleaned + txq->in_use != txq->processed &&
1892 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
1893 set_bit(TXQ_RUNNING, &txq->flags);
1894 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
1895 V_EGRCNTX(txq->cntxt_id));
1896 }
1897 }
1898
1899 if (sleeping & F_RSPD_TXQ1_GTS) {
1900 struct sge_txq *txq = &qs->txq[TXQ_OFLD];
1901
1902 if (txq->cleaned + txq->in_use != txq->processed &&
1903 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
1904 set_bit(TXQ_RUNNING, &txq->flags);
1905 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
1906 V_EGRCNTX(txq->cntxt_id));
1907 }
1908 }
1909}
1910
1911/**
1912 * is_new_response - check if a response is newly written
1913 * @r: the response descriptor
1914 * @q: the response queue
1915 *
1916 * Returns true if a response descriptor contains a yet unprocessed
1917 * response.
1918 */
1919static inline int is_new_response(const struct rsp_desc *r,
1920 const struct sge_rspq *q)
1921{
1922 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
1923}
1924
1925#define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
1926#define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
1927 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
1928 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
1929 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
1930
1931/* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
1932#define NOMEM_INTR_DELAY 2500
1933
1934/**
1935 * process_responses - process responses from an SGE response queue
1936 * @adap: the adapter
1937 * @qs: the queue set to which the response queue belongs
1938 * @budget: how many responses can be processed in this round
1939 *
1940 * Process responses from an SGE response queue up to the supplied budget.
1941 * Responses include received packets as well as credits and other events
1942 * for the queues that belong to the response queue's queue set.
1943 * A negative budget is effectively unlimited.
1944 *
1945 * Additionally choose the interrupt holdoff time for the next interrupt
1946 * on this queue. If the system is under memory shortage use a fairly
1947 * long delay to help recovery.
1948 */
1949static int process_responses(struct adapter *adap, struct sge_qset *qs,
1950 int budget)
1951{
1952 struct sge_rspq *q = &qs->rspq;
1953 struct rsp_desc *r = &q->desc[q->cidx];
1954 int budget_left = budget;
6195c71d 1955 unsigned int sleeping = 0;
4d22de3e
DLR
1956 struct sk_buff *offload_skbs[RX_BUNDLE_SIZE];
1957 int ngathered = 0;
1958
1959 q->next_holdoff = q->holdoff_tmr;
1960
1961 while (likely(budget_left && is_new_response(r, q))) {
e0994eb1 1962 int eth, ethpad = 2;
4d22de3e
DLR
1963 struct sk_buff *skb = NULL;
1964 u32 len, flags = ntohl(r->flags);
1965 u32 rss_hi = *(const u32 *)r, rss_lo = r->rss_hdr.rss_hash_val;
1966
1967 eth = r->rss_hdr.opcode == CPL_RX_PKT;
1968
1969 if (unlikely(flags & F_RSPD_ASYNC_NOTIF)) {
1970 skb = alloc_skb(AN_PKT_SIZE, GFP_ATOMIC);
1971 if (!skb)
1972 goto no_mem;
1973
1974 memcpy(__skb_put(skb, AN_PKT_SIZE), r, AN_PKT_SIZE);
1975 skb->data[0] = CPL_ASYNC_NOTIF;
1976 rss_hi = htonl(CPL_ASYNC_NOTIF << 24);
1977 q->async_notif++;
1978 } else if (flags & F_RSPD_IMM_DATA_VALID) {
1979 skb = get_imm_packet(r);
1980 if (unlikely(!skb)) {
cf992af5 1981no_mem:
4d22de3e
DLR
1982 q->next_holdoff = NOMEM_INTR_DELAY;
1983 q->nomem++;
1984 /* consume one credit since we tried */
1985 budget_left--;
1986 break;
1987 }
1988 q->imm_data++;
e0994eb1 1989 ethpad = 0;
4d22de3e 1990 } else if ((len = ntohl(r->len_cq)) != 0) {
cf992af5 1991 struct sge_fl *fl;
e0994eb1 1992
cf992af5
DLR
1993 fl = (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
1994 if (fl->use_pages) {
1995 void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
e0994eb1 1996
cf992af5
DLR
1997 prefetch(addr);
1998#if L1_CACHE_BYTES < 128
1999 prefetch(addr + L1_CACHE_BYTES);
2000#endif
e0994eb1
DLR
2001 __refill_fl(adap, fl);
2002
cf992af5
DLR
2003 skb = get_packet_pg(adap, fl, G_RSPD_LEN(len),
2004 eth ? SGE_RX_DROP_THRES : 0);
2005 } else
e0994eb1
DLR
2006 skb = get_packet(adap, fl, G_RSPD_LEN(len),
2007 eth ? SGE_RX_DROP_THRES : 0);
cf992af5
DLR
2008 if (unlikely(!skb)) {
2009 if (!eth)
2010 goto no_mem;
2011 q->rx_drops++;
2012 } else if (unlikely(r->rss_hdr.opcode == CPL_TRACE_PKT))
2013 __skb_pull(skb, 2);
4d22de3e 2014
4d22de3e
DLR
2015 if (++fl->cidx == fl->size)
2016 fl->cidx = 0;
2017 } else
2018 q->pure_rsps++;
2019
2020 if (flags & RSPD_CTRL_MASK) {
2021 sleeping |= flags & RSPD_GTS_MASK;
6195c71d 2022 handle_rsp_cntrl_info(qs, flags);
4d22de3e
DLR
2023 }
2024
2025 r++;
2026 if (unlikely(++q->cidx == q->size)) {
2027 q->cidx = 0;
2028 q->gen ^= 1;
2029 r = q->desc;
2030 }
2031 prefetch(r);
2032
2033 if (++q->credits >= (q->size / 4)) {
2034 refill_rspq(adap, q, q->credits);
2035 q->credits = 0;
2036 }
2037
cf992af5 2038 if (likely(skb != NULL)) {
4d22de3e
DLR
2039 if (eth)
2040 rx_eth(adap, q, skb, ethpad);
2041 else {
cf992af5
DLR
2042 /* Preserve the RSS info in csum & priority */
2043 skb->csum = rss_hi;
2044 skb->priority = rss_lo;
2045 ngathered = rx_offload(&adap->tdev, q, skb,
2046 offload_skbs,
e0994eb1 2047 ngathered);
4d22de3e
DLR
2048 }
2049 }
4d22de3e
DLR
2050 --budget_left;
2051 }
2052
4d22de3e
DLR
2053 deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered);
2054 if (sleeping)
2055 check_ring_db(adap, qs, sleeping);
2056
2057 smp_mb(); /* commit Tx queue .processed updates */
2058 if (unlikely(qs->txq_stopped != 0))
2059 restart_tx(qs);
2060
2061 budget -= budget_left;
2062 return budget;
2063}
2064
2065static inline int is_pure_response(const struct rsp_desc *r)
2066{
2067 u32 n = ntohl(r->flags) & (F_RSPD_ASYNC_NOTIF | F_RSPD_IMM_DATA_VALID);
2068
2069 return (n | r->len_cq) == 0;
2070}
2071
2072/**
2073 * napi_rx_handler - the NAPI handler for Rx processing
2074 * @dev: the net device
2075 * @budget: how many packets we can process in this round
2076 *
2077 * Handler for new data events when using NAPI.
2078 */
2079static int napi_rx_handler(struct net_device *dev, int *budget)
2080{
5fbf816f
DLR
2081 const struct port_info *pi = netdev_priv(dev);
2082 struct adapter *adap = pi->adapter;
4d22de3e
DLR
2083 struct sge_qset *qs = dev2qset(dev);
2084 int effective_budget = min(*budget, dev->quota);
2085
2086 int work_done = process_responses(adap, qs, effective_budget);
2087 *budget -= work_done;
2088 dev->quota -= work_done;
2089
2090 if (work_done >= effective_budget)
2091 return 1;
2092
2093 netif_rx_complete(dev);
2094
2095 /*
2096 * Because we don't atomically flush the following write it is
2097 * possible that in very rare cases it can reach the device in a way
2098 * that races with a new response being written plus an error interrupt
2099 * causing the NAPI interrupt handler below to return unhandled status
2100 * to the OS. To protect against this would require flushing the write
2101 * and doing both the write and the flush with interrupts off. Way too
2102 * expensive and unjustifiable given the rarity of the race.
2103 *
2104 * The race cannot happen at all with MSI-X.
2105 */
2106 t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
2107 V_NEWTIMER(qs->rspq.next_holdoff) |
2108 V_NEWINDEX(qs->rspq.cidx));
2109 return 0;
2110}
2111
2112/*
2113 * Returns true if the device is already scheduled for polling.
2114 */
2115static inline int napi_is_scheduled(struct net_device *dev)
2116{
2117 return test_bit(__LINK_STATE_RX_SCHED, &dev->state);
2118}
2119
2120/**
2121 * process_pure_responses - process pure responses from a response queue
2122 * @adap: the adapter
2123 * @qs: the queue set owning the response queue
2124 * @r: the first pure response to process
2125 *
2126 * A simpler version of process_responses() that handles only pure (i.e.,
2127 * non data-carrying) responses. Such respones are too light-weight to
2128 * justify calling a softirq under NAPI, so we handle them specially in
2129 * the interrupt handler. The function is called with a pointer to a
2130 * response, which the caller must ensure is a valid pure response.
2131 *
2132 * Returns 1 if it encounters a valid data-carrying response, 0 otherwise.
2133 */
2134static int process_pure_responses(struct adapter *adap, struct sge_qset *qs,
2135 struct rsp_desc *r)
2136{
2137 struct sge_rspq *q = &qs->rspq;
6195c71d 2138 unsigned int sleeping = 0;
4d22de3e
DLR
2139
2140 do {
2141 u32 flags = ntohl(r->flags);
2142
2143 r++;
2144 if (unlikely(++q->cidx == q->size)) {
2145 q->cidx = 0;
2146 q->gen ^= 1;
2147 r = q->desc;
2148 }
2149 prefetch(r);
2150
2151 if (flags & RSPD_CTRL_MASK) {
2152 sleeping |= flags & RSPD_GTS_MASK;
6195c71d 2153 handle_rsp_cntrl_info(qs, flags);
4d22de3e
DLR
2154 }
2155
2156 q->pure_rsps++;
2157 if (++q->credits >= (q->size / 4)) {
2158 refill_rspq(adap, q, q->credits);
2159 q->credits = 0;
2160 }
2161 } while (is_new_response(r, q) && is_pure_response(r));
2162
4d22de3e
DLR
2163 if (sleeping)
2164 check_ring_db(adap, qs, sleeping);
2165
2166 smp_mb(); /* commit Tx queue .processed updates */
2167 if (unlikely(qs->txq_stopped != 0))
2168 restart_tx(qs);
2169
2170 return is_new_response(r, q);
2171}
2172
2173/**
2174 * handle_responses - decide what to do with new responses in NAPI mode
2175 * @adap: the adapter
2176 * @q: the response queue
2177 *
2178 * This is used by the NAPI interrupt handlers to decide what to do with
2179 * new SGE responses. If there are no new responses it returns -1. If
2180 * there are new responses and they are pure (i.e., non-data carrying)
2181 * it handles them straight in hard interrupt context as they are very
2182 * cheap and don't deliver any packets. Finally, if there are any data
2183 * signaling responses it schedules the NAPI handler. Returns 1 if it
2184 * schedules NAPI, 0 if all new responses were pure.
2185 *
2186 * The caller must ascertain NAPI is not already running.
2187 */
2188static inline int handle_responses(struct adapter *adap, struct sge_rspq *q)
2189{
2190 struct sge_qset *qs = rspq_to_qset(q);
2191 struct rsp_desc *r = &q->desc[q->cidx];
2192
2193 if (!is_new_response(r, q))
2194 return -1;
2195 if (is_pure_response(r) && process_pure_responses(adap, qs, r) == 0) {
2196 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2197 V_NEWTIMER(q->holdoff_tmr) | V_NEWINDEX(q->cidx));
2198 return 0;
2199 }
2200 if (likely(__netif_rx_schedule_prep(qs->netdev)))
2201 __netif_rx_schedule(qs->netdev);
2202 return 1;
2203}
2204
2205/*
2206 * The MSI-X interrupt handler for an SGE response queue for the non-NAPI case
2207 * (i.e., response queue serviced in hard interrupt).
2208 */
2209irqreturn_t t3_sge_intr_msix(int irq, void *cookie)
2210{
2211 struct sge_qset *qs = cookie;
5fbf816f
DLR
2212 const struct port_info *pi = netdev_priv(qs->netdev);
2213 struct adapter *adap = pi->adapter;
4d22de3e
DLR
2214 struct sge_rspq *q = &qs->rspq;
2215
2216 spin_lock(&q->lock);
2217 if (process_responses(adap, qs, -1) == 0)
2218 q->unhandled_irqs++;
2219 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2220 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2221 spin_unlock(&q->lock);
2222 return IRQ_HANDLED;
2223}
2224
2225/*
2226 * The MSI-X interrupt handler for an SGE response queue for the NAPI case
2227 * (i.e., response queue serviced by NAPI polling).
2228 */
2229irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie)
2230{
2231 struct sge_qset *qs = cookie;
5fbf816f
DLR
2232 const struct port_info *pi = netdev_priv(qs->netdev);
2233 struct adapter *adap = pi->adapter;
4d22de3e
DLR
2234 struct sge_rspq *q = &qs->rspq;
2235
2236 spin_lock(&q->lock);
4d22de3e
DLR
2237
2238 if (handle_responses(adap, q) < 0)
2239 q->unhandled_irqs++;
2240 spin_unlock(&q->lock);
2241 return IRQ_HANDLED;
2242}
2243
2244/*
2245 * The non-NAPI MSI interrupt handler. This needs to handle data events from
2246 * SGE response queues as well as error and other async events as they all use
2247 * the same MSI vector. We use one SGE response queue per port in this mode
2248 * and protect all response queues with queue 0's lock.
2249 */
2250static irqreturn_t t3_intr_msi(int irq, void *cookie)
2251{
2252 int new_packets = 0;
2253 struct adapter *adap = cookie;
2254 struct sge_rspq *q = &adap->sge.qs[0].rspq;
2255
2256 spin_lock(&q->lock);
2257
2258 if (process_responses(adap, &adap->sge.qs[0], -1)) {
2259 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2260 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2261 new_packets = 1;
2262 }
2263
2264 if (adap->params.nports == 2 &&
2265 process_responses(adap, &adap->sge.qs[1], -1)) {
2266 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2267
2268 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q1->cntxt_id) |
2269 V_NEWTIMER(q1->next_holdoff) |
2270 V_NEWINDEX(q1->cidx));
2271 new_packets = 1;
2272 }
2273
2274 if (!new_packets && t3_slow_intr_handler(adap) == 0)
2275 q->unhandled_irqs++;
2276
2277 spin_unlock(&q->lock);
2278 return IRQ_HANDLED;
2279}
2280
2281static int rspq_check_napi(struct net_device *dev, struct sge_rspq *q)
2282{
2283 if (!napi_is_scheduled(dev) && is_new_response(&q->desc[q->cidx], q)) {
2284 if (likely(__netif_rx_schedule_prep(dev)))
2285 __netif_rx_schedule(dev);
2286 return 1;
2287 }
2288 return 0;
2289}
2290
2291/*
2292 * The MSI interrupt handler for the NAPI case (i.e., response queues serviced
2293 * by NAPI polling). Handles data events from SGE response queues as well as
2294 * error and other async events as they all use the same MSI vector. We use
2295 * one SGE response queue per port in this mode and protect all response
2296 * queues with queue 0's lock.
2297 */
2298irqreturn_t t3_intr_msi_napi(int irq, void *cookie)
2299{
2300 int new_packets;
2301 struct adapter *adap = cookie;
2302 struct sge_rspq *q = &adap->sge.qs[0].rspq;
2303
2304 spin_lock(&q->lock);
2305
2306 new_packets = rspq_check_napi(adap->sge.qs[0].netdev, q);
2307 if (adap->params.nports == 2)
2308 new_packets += rspq_check_napi(adap->sge.qs[1].netdev,
2309 &adap->sge.qs[1].rspq);
2310 if (!new_packets && t3_slow_intr_handler(adap) == 0)
2311 q->unhandled_irqs++;
2312
2313 spin_unlock(&q->lock);
2314 return IRQ_HANDLED;
2315}
2316
2317/*
2318 * A helper function that processes responses and issues GTS.
2319 */
2320static inline int process_responses_gts(struct adapter *adap,
2321 struct sge_rspq *rq)
2322{
2323 int work;
2324
2325 work = process_responses(adap, rspq_to_qset(rq), -1);
2326 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2327 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2328 return work;
2329}
2330
2331/*
2332 * The legacy INTx interrupt handler. This needs to handle data events from
2333 * SGE response queues as well as error and other async events as they all use
2334 * the same interrupt pin. We use one SGE response queue per port in this mode
2335 * and protect all response queues with queue 0's lock.
2336 */
2337static irqreturn_t t3_intr(int irq, void *cookie)
2338{
2339 int work_done, w0, w1;
2340 struct adapter *adap = cookie;
2341 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2342 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2343
2344 spin_lock(&q0->lock);
2345
2346 w0 = is_new_response(&q0->desc[q0->cidx], q0);
2347 w1 = adap->params.nports == 2 &&
2348 is_new_response(&q1->desc[q1->cidx], q1);
2349
2350 if (likely(w0 | w1)) {
2351 t3_write_reg(adap, A_PL_CLI, 0);
2352 t3_read_reg(adap, A_PL_CLI); /* flush */
2353
2354 if (likely(w0))
2355 process_responses_gts(adap, q0);
2356
2357 if (w1)
2358 process_responses_gts(adap, q1);
2359
2360 work_done = w0 | w1;
2361 } else
2362 work_done = t3_slow_intr_handler(adap);
2363
2364 spin_unlock(&q0->lock);
2365 return IRQ_RETVAL(work_done != 0);
2366}
2367
2368/*
2369 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2370 * Handles data events from SGE response queues as well as error and other
2371 * async events as they all use the same interrupt pin. We use one SGE
2372 * response queue per port in this mode and protect all response queues with
2373 * queue 0's lock.
2374 */
2375static irqreturn_t t3b_intr(int irq, void *cookie)
2376{
2377 u32 map;
2378 struct adapter *adap = cookie;
2379 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2380
2381 t3_write_reg(adap, A_PL_CLI, 0);
2382 map = t3_read_reg(adap, A_SG_DATA_INTR);
2383
2384 if (unlikely(!map)) /* shared interrupt, most likely */
2385 return IRQ_NONE;
2386
2387 spin_lock(&q0->lock);
2388
2389 if (unlikely(map & F_ERRINTR))
2390 t3_slow_intr_handler(adap);
2391
2392 if (likely(map & 1))
2393 process_responses_gts(adap, q0);
2394
2395 if (map & 2)
2396 process_responses_gts(adap, &adap->sge.qs[1].rspq);
2397
2398 spin_unlock(&q0->lock);
2399 return IRQ_HANDLED;
2400}
2401
2402/*
2403 * NAPI interrupt handler for legacy INTx interrupts for T3B-based cards.
2404 * Handles data events from SGE response queues as well as error and other
2405 * async events as they all use the same interrupt pin. We use one SGE
2406 * response queue per port in this mode and protect all response queues with
2407 * queue 0's lock.
2408 */
2409static irqreturn_t t3b_intr_napi(int irq, void *cookie)
2410{
2411 u32 map;
2412 struct net_device *dev;
2413 struct adapter *adap = cookie;
2414 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2415
2416 t3_write_reg(adap, A_PL_CLI, 0);
2417 map = t3_read_reg(adap, A_SG_DATA_INTR);
2418
2419 if (unlikely(!map)) /* shared interrupt, most likely */
2420 return IRQ_NONE;
2421
2422 spin_lock(&q0->lock);
2423
2424 if (unlikely(map & F_ERRINTR))
2425 t3_slow_intr_handler(adap);
2426
2427 if (likely(map & 1)) {
2428 dev = adap->sge.qs[0].netdev;
2429
4d22de3e
DLR
2430 if (likely(__netif_rx_schedule_prep(dev)))
2431 __netif_rx_schedule(dev);
2432 }
2433 if (map & 2) {
2434 dev = adap->sge.qs[1].netdev;
2435
4d22de3e
DLR
2436 if (likely(__netif_rx_schedule_prep(dev)))
2437 __netif_rx_schedule(dev);
2438 }
2439
2440 spin_unlock(&q0->lock);
2441 return IRQ_HANDLED;
2442}
2443
2444/**
2445 * t3_intr_handler - select the top-level interrupt handler
2446 * @adap: the adapter
2447 * @polling: whether using NAPI to service response queues
2448 *
2449 * Selects the top-level interrupt handler based on the type of interrupts
2450 * (MSI-X, MSI, or legacy) and whether NAPI will be used to service the
2451 * response queues.
2452 */
2453intr_handler_t t3_intr_handler(struct adapter *adap, int polling)
2454{
2455 if (adap->flags & USING_MSIX)
2456 return polling ? t3_sge_intr_msix_napi : t3_sge_intr_msix;
2457 if (adap->flags & USING_MSI)
2458 return polling ? t3_intr_msi_napi : t3_intr_msi;
2459 if (adap->params.rev > 0)
2460 return polling ? t3b_intr_napi : t3b_intr;
2461 return t3_intr;
2462}
2463
2464/**
2465 * t3_sge_err_intr_handler - SGE async event interrupt handler
2466 * @adapter: the adapter
2467 *
2468 * Interrupt handler for SGE asynchronous (non-data) events.
2469 */
2470void t3_sge_err_intr_handler(struct adapter *adapter)
2471{
2472 unsigned int v, status = t3_read_reg(adapter, A_SG_INT_CAUSE);
2473
2474 if (status & F_RSPQCREDITOVERFOW)
2475 CH_ALERT(adapter, "SGE response queue credit overflow\n");
2476
2477 if (status & F_RSPQDISABLED) {
2478 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
2479
2480 CH_ALERT(adapter,
2481 "packet delivered to disabled response queue "
2482 "(0x%x)\n", (v >> S_RSPQ0DISABLED) & 0xff);
2483 }
2484
2485 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
2486 if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED))
2487 t3_fatal_err(adapter);
2488}
2489
2490/**
2491 * sge_timer_cb - perform periodic maintenance of an SGE qset
2492 * @data: the SGE queue set to maintain
2493 *
2494 * Runs periodically from a timer to perform maintenance of an SGE queue
2495 * set. It performs two tasks:
2496 *
2497 * a) Cleans up any completed Tx descriptors that may still be pending.
2498 * Normal descriptor cleanup happens when new packets are added to a Tx
2499 * queue so this timer is relatively infrequent and does any cleanup only
2500 * if the Tx queue has not seen any new packets in a while. We make a
2501 * best effort attempt to reclaim descriptors, in that we don't wait
2502 * around if we cannot get a queue's lock (which most likely is because
2503 * someone else is queueing new packets and so will also handle the clean
2504 * up). Since control queues use immediate data exclusively we don't
2505 * bother cleaning them up here.
2506 *
2507 * b) Replenishes Rx queues that have run out due to memory shortage.
2508 * Normally new Rx buffers are added when existing ones are consumed but
2509 * when out of memory a queue can become empty. We try to add only a few
2510 * buffers here, the queue will be replenished fully as these new buffers
2511 * are used up if memory shortage has subsided.
2512 */
2513static void sge_timer_cb(unsigned long data)
2514{
2515 spinlock_t *lock;
2516 struct sge_qset *qs = (struct sge_qset *)data;
5fbf816f
DLR
2517 const struct port_info *pi = netdev_priv(qs->netdev);
2518 struct adapter *adap = pi->adapter;
4d22de3e
DLR
2519
2520 if (spin_trylock(&qs->txq[TXQ_ETH].lock)) {
2521 reclaim_completed_tx(adap, &qs->txq[TXQ_ETH]);
2522 spin_unlock(&qs->txq[TXQ_ETH].lock);
2523 }
2524 if (spin_trylock(&qs->txq[TXQ_OFLD].lock)) {
2525 reclaim_completed_tx(adap, &qs->txq[TXQ_OFLD]);
2526 spin_unlock(&qs->txq[TXQ_OFLD].lock);
2527 }
2528 lock = (adap->flags & USING_MSIX) ? &qs->rspq.lock :
e0994eb1 2529 &adap->sge.qs[0].rspq.lock;
4d22de3e
DLR
2530 if (spin_trylock_irq(lock)) {
2531 if (!napi_is_scheduled(qs->netdev)) {
bae73f44
DLR
2532 u32 status = t3_read_reg(adap, A_SG_RSPQ_FL_STATUS);
2533
4d22de3e
DLR
2534 if (qs->fl[0].credits < qs->fl[0].size)
2535 __refill_fl(adap, &qs->fl[0]);
2536 if (qs->fl[1].credits < qs->fl[1].size)
2537 __refill_fl(adap, &qs->fl[1]);
bae73f44
DLR
2538
2539 if (status & (1 << qs->rspq.cntxt_id)) {
2540 qs->rspq.starved++;
2541 if (qs->rspq.credits) {
2542 refill_rspq(adap, &qs->rspq, 1);
2543 qs->rspq.credits--;
2544 qs->rspq.restarted++;
e0994eb1 2545 t3_write_reg(adap, A_SG_RSPQ_FL_STATUS,
bae73f44
DLR
2546 1 << qs->rspq.cntxt_id);
2547 }
2548 }
4d22de3e
DLR
2549 }
2550 spin_unlock_irq(lock);
2551 }
2552 mod_timer(&qs->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
2553}
2554
2555/**
2556 * t3_update_qset_coalesce - update coalescing settings for a queue set
2557 * @qs: the SGE queue set
2558 * @p: new queue set parameters
2559 *
2560 * Update the coalescing settings for an SGE queue set. Nothing is done
2561 * if the queue set is not initialized yet.
2562 */
2563void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
2564{
2565 if (!qs->netdev)
2566 return;
2567
2568 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);/* can't be 0 */
2569 qs->rspq.polling = p->polling;
2570 qs->netdev->poll = p->polling ? napi_rx_handler : ofld_poll;
2571}
2572
2573/**
2574 * t3_sge_alloc_qset - initialize an SGE queue set
2575 * @adapter: the adapter
2576 * @id: the queue set id
2577 * @nports: how many Ethernet ports will be using this queue set
2578 * @irq_vec_idx: the IRQ vector index for response queue interrupts
2579 * @p: configuration parameters for this queue set
2580 * @ntxq: number of Tx queues for the queue set
2581 * @netdev: net device associated with this queue set
2582 *
2583 * Allocate resources and initialize an SGE queue set. A queue set
2584 * comprises a response queue, two Rx free-buffer queues, and up to 3
2585 * Tx queues. The Tx queues are assigned roles in the order Ethernet
2586 * queue, offload queue, and control queue.
2587 */
2588int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
2589 int irq_vec_idx, const struct qset_params *p,
2590 int ntxq, struct net_device *netdev)
2591{
2592 int i, ret = -ENOMEM;
2593 struct sge_qset *q = &adapter->sge.qs[id];
2594
2595 init_qset_cntxt(q, id);
2596 init_timer(&q->tx_reclaim_timer);
2597 q->tx_reclaim_timer.data = (unsigned long)q;
2598 q->tx_reclaim_timer.function = sge_timer_cb;
2599
2600 q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size,
2601 sizeof(struct rx_desc),
2602 sizeof(struct rx_sw_desc),
2603 &q->fl[0].phys_addr, &q->fl[0].sdesc);
2604 if (!q->fl[0].desc)
2605 goto err;
2606
2607 q->fl[1].desc = alloc_ring(adapter->pdev, p->jumbo_size,
2608 sizeof(struct rx_desc),
2609 sizeof(struct rx_sw_desc),
2610 &q->fl[1].phys_addr, &q->fl[1].sdesc);
2611 if (!q->fl[1].desc)
2612 goto err;
2613
2614 q->rspq.desc = alloc_ring(adapter->pdev, p->rspq_size,
2615 sizeof(struct rsp_desc), 0,
2616 &q->rspq.phys_addr, NULL);
2617 if (!q->rspq.desc)
2618 goto err;
2619
2620 for (i = 0; i < ntxq; ++i) {
2621 /*
2622 * The control queue always uses immediate data so does not
2623 * need to keep track of any sk_buffs.
2624 */
2625 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2626
2627 q->txq[i].desc = alloc_ring(adapter->pdev, p->txq_size[i],
2628 sizeof(struct tx_desc), sz,
2629 &q->txq[i].phys_addr,
2630 &q->txq[i].sdesc);
2631 if (!q->txq[i].desc)
2632 goto err;
2633
2634 q->txq[i].gen = 1;
2635 q->txq[i].size = p->txq_size[i];
2636 spin_lock_init(&q->txq[i].lock);
2637 skb_queue_head_init(&q->txq[i].sendq);
2638 }
2639
2640 tasklet_init(&q->txq[TXQ_OFLD].qresume_tsk, restart_offloadq,
2641 (unsigned long)q);
2642 tasklet_init(&q->txq[TXQ_CTRL].qresume_tsk, restart_ctrlq,
2643 (unsigned long)q);
2644
2645 q->fl[0].gen = q->fl[1].gen = 1;
2646 q->fl[0].size = p->fl_size;
2647 q->fl[1].size = p->jumbo_size;
2648
2649 q->rspq.gen = 1;
2650 q->rspq.size = p->rspq_size;
2651 spin_lock_init(&q->rspq.lock);
2652
2653 q->txq[TXQ_ETH].stop_thres = nports *
2654 flits_to_desc(sgl_len(MAX_SKB_FRAGS + 1) + 3);
2655
cf992af5
DLR
2656#if FL0_PG_CHUNK_SIZE > 0
2657 q->fl[0].buf_size = FL0_PG_CHUNK_SIZE;
e0994eb1 2658#else
cf992af5 2659 q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE + sizeof(struct cpl_rx_data);
e0994eb1 2660#endif
cf992af5
DLR
2661 q->fl[0].use_pages = FL0_PG_CHUNK_SIZE > 0;
2662 q->fl[1].buf_size = is_offload(adapter) ?
2663 (16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
2664 MAX_FRAME_SIZE + 2 + sizeof(struct cpl_rx_pkt);
4d22de3e
DLR
2665
2666 spin_lock(&adapter->sge.reg_lock);
2667
2668 /* FL threshold comparison uses < */
2669 ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx,
2670 q->rspq.phys_addr, q->rspq.size,
2671 q->fl[0].buf_size, 1, 0);
2672 if (ret)
2673 goto err_unlock;
2674
2675 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2676 ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0,
2677 q->fl[i].phys_addr, q->fl[i].size,
2678 q->fl[i].buf_size, p->cong_thres, 1,
2679 0);
2680 if (ret)
2681 goto err_unlock;
2682 }
2683
2684 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2685 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2686 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2687 1, 0);
2688 if (ret)
2689 goto err_unlock;
2690
2691 if (ntxq > 1) {
2692 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_OFLD].cntxt_id,
2693 USE_GTS, SGE_CNTXT_OFLD, id,
2694 q->txq[TXQ_OFLD].phys_addr,
2695 q->txq[TXQ_OFLD].size, 0, 1, 0);
2696 if (ret)
2697 goto err_unlock;
2698 }
2699
2700 if (ntxq > 2) {
2701 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_CTRL].cntxt_id, 0,
2702 SGE_CNTXT_CTRL, id,
2703 q->txq[TXQ_CTRL].phys_addr,
2704 q->txq[TXQ_CTRL].size,
2705 q->txq[TXQ_CTRL].token, 1, 0);
2706 if (ret)
2707 goto err_unlock;
2708 }
2709
2710 spin_unlock(&adapter->sge.reg_lock);
2711 q->netdev = netdev;
2712 t3_update_qset_coalesce(q, p);
2713
2714 /*
2715 * We use atalk_ptr as a backpointer to a qset. In case a device is
2716 * associated with multiple queue sets only the first one sets
2717 * atalk_ptr.
2718 */
2719 if (netdev->atalk_ptr == NULL)
2720 netdev->atalk_ptr = q;
2721
2722 refill_fl(adapter, &q->fl[0], q->fl[0].size, GFP_KERNEL);
2723 refill_fl(adapter, &q->fl[1], q->fl[1].size, GFP_KERNEL);
2724 refill_rspq(adapter, &q->rspq, q->rspq.size - 1);
2725
2726 t3_write_reg(adapter, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2727 V_NEWTIMER(q->rspq.holdoff_tmr));
2728
2729 mod_timer(&q->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
2730 return 0;
2731
2732 err_unlock:
2733 spin_unlock(&adapter->sge.reg_lock);
2734 err:
2735 t3_free_qset(adapter, q);
2736 return ret;
2737}
2738
2739/**
2740 * t3_free_sge_resources - free SGE resources
2741 * @adap: the adapter
2742 *
2743 * Frees resources used by the SGE queue sets.
2744 */
2745void t3_free_sge_resources(struct adapter *adap)
2746{
2747 int i;
2748
2749 for (i = 0; i < SGE_QSETS; ++i)
2750 t3_free_qset(adap, &adap->sge.qs[i]);
2751}
2752
2753/**
2754 * t3_sge_start - enable SGE
2755 * @adap: the adapter
2756 *
2757 * Enables the SGE for DMAs. This is the last step in starting packet
2758 * transfers.
2759 */
2760void t3_sge_start(struct adapter *adap)
2761{
2762 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
2763}
2764
2765/**
2766 * t3_sge_stop - disable SGE operation
2767 * @adap: the adapter
2768 *
2769 * Disables the DMA engine. This can be called in emeregencies (e.g.,
2770 * from error interrupts) or from normal process context. In the latter
2771 * case it also disables any pending queue restart tasklets. Note that
2772 * if it is called in interrupt context it cannot disable the restart
2773 * tasklets as it cannot wait, however the tasklets will have no effect
2774 * since the doorbells are disabled and the driver will call this again
2775 * later from process context, at which time the tasklets will be stopped
2776 * if they are still running.
2777 */
2778void t3_sge_stop(struct adapter *adap)
2779{
2780 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, 0);
2781 if (!in_interrupt()) {
2782 int i;
2783
2784 for (i = 0; i < SGE_QSETS; ++i) {
2785 struct sge_qset *qs = &adap->sge.qs[i];
2786
2787 tasklet_kill(&qs->txq[TXQ_OFLD].qresume_tsk);
2788 tasklet_kill(&qs->txq[TXQ_CTRL].qresume_tsk);
2789 }
2790 }
2791}
2792
2793/**
2794 * t3_sge_init - initialize SGE
2795 * @adap: the adapter
2796 * @p: the SGE parameters
2797 *
2798 * Performs SGE initialization needed every time after a chip reset.
2799 * We do not initialize any of the queue sets here, instead the driver
2800 * top-level must request those individually. We also do not enable DMA
2801 * here, that should be done after the queues have been set up.
2802 */
2803void t3_sge_init(struct adapter *adap, struct sge_params *p)
2804{
2805 unsigned int ctrl, ups = ffs(pci_resource_len(adap->pdev, 2) >> 12);
2806
2807 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
2808 F_CQCRDTCTRL |
2809 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
2810 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
2811#if SGE_NUM_GENBITS == 1
2812 ctrl |= F_EGRGENCTRL;
2813#endif
2814 if (adap->params.rev > 0) {
2815 if (!(adap->flags & (USING_MSIX | USING_MSI)))
2816 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
2817 ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL;
2818 }
2819 t3_write_reg(adap, A_SG_CONTROL, ctrl);
2820 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
2821 V_LORCQDRBTHRSH(512));
2822 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
2823 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
6195c71d 2824 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
4d22de3e
DLR
2825 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000);
2826 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
2827 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
2828 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
2829 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
2830 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
2831}
2832
2833/**
2834 * t3_sge_prep - one-time SGE initialization
2835 * @adap: the associated adapter
2836 * @p: SGE parameters
2837 *
2838 * Performs one-time initialization of SGE SW state. Includes determining
2839 * defaults for the assorted SGE parameters, which admins can change until
2840 * they are used to initialize the SGE.
2841 */
2842void __devinit t3_sge_prep(struct adapter *adap, struct sge_params *p)
2843{
2844 int i;
2845
2846 p->max_pkt_size = (16 * 1024) - sizeof(struct cpl_rx_data) -
2847 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
2848
2849 for (i = 0; i < SGE_QSETS; ++i) {
2850 struct qset_params *q = p->qset + i;
2851
2852 q->polling = adap->params.rev > 0;
2853 q->coalesce_usecs = 5;
2854 q->rspq_size = 1024;
e0994eb1 2855 q->fl_size = 1024;
4d22de3e
DLR
2856 q->jumbo_size = 512;
2857 q->txq_size[TXQ_ETH] = 1024;
2858 q->txq_size[TXQ_OFLD] = 1024;
2859 q->txq_size[TXQ_CTRL] = 256;
2860 q->cong_thres = 0;
2861 }
2862
2863 spin_lock_init(&adap->sge.reg_lock);
2864}
2865
2866/**
2867 * t3_get_desc - dump an SGE descriptor for debugging purposes
2868 * @qs: the queue set
2869 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
2870 * @idx: the descriptor index in the queue
2871 * @data: where to dump the descriptor contents
2872 *
2873 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
2874 * size of the descriptor.
2875 */
2876int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
2877 unsigned char *data)
2878{
2879 if (qnum >= 6)
2880 return -EINVAL;
2881
2882 if (qnum < 3) {
2883 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
2884 return -EINVAL;
2885 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
2886 return sizeof(struct tx_desc);
2887 }
2888
2889 if (qnum == 3) {
2890 if (!qs->rspq.desc || idx >= qs->rspq.size)
2891 return -EINVAL;
2892 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
2893 return sizeof(struct rsp_desc);
2894 }
2895
2896 qnum -= 4;
2897 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
2898 return -EINVAL;
2899 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
2900 return sizeof(struct rx_desc);
2901}
This page took 0.409916 seconds and 5 git commands to generate.