diff options
Diffstat (limited to 'drivers/net/ethernet/chelsio/cxgb4/sge.c')
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/sge.c | 408 |
1 files changed, 281 insertions, 127 deletions
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index d22d728d4e5c..ebf935a1e352 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -203,6 +203,9 @@ enum { RX_LARGE_MTU_BUF = 0x3, /* large MTU buffer */ }; +static int timer_pkt_quota[] = {1, 1, 2, 3, 4, 5}; +#define MIN_NAPI_WORK 1 + static inline dma_addr_t get_buf_addr(const struct rx_sw_desc *d) { return d->dma_addr & ~(dma_addr_t)RX_BUF_FLAGS; @@ -521,9 +524,25 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q) val = PIDX(q->pend_cred / 8); if (!is_t4(adap->params.chip)) val |= DBTYPE(1); + val |= DBPRIO(1); wmb(); - t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), DBPRIO(1) | - QID(q->cntxt_id) | val); + + /* If we don't have access to the new User Doorbell (T5+), use + * the old doorbell mechanism; otherwise use the new BAR2 + * mechanism. + */ + if (unlikely(q->bar2_addr == NULL)) { + t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), + val | QID(q->cntxt_id)); + } else { + writel(val | QID(q->bar2_qid), + q->bar2_addr + SGE_UDB_KDOORBELL); + + /* This Write memory Barrier will force the write to + * the User Doorbell area to be flushed. + */ + wmb(); + } q->pend_cred &= 7; } } @@ -559,7 +578,7 @@ static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n, __be64 *d = &q->desc[q->pidx]; struct rx_sw_desc *sd = &q->sdesc[q->pidx]; - gfp |= __GFP_NOWARN | __GFP_COLD; + gfp |= __GFP_NOWARN; if (s->fl_pg_order == 0) goto alloc_small_pages; @@ -568,7 +587,7 @@ static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n, * Prefer large buffers */ while (n) { - pg = alloc_pages(gfp | __GFP_COMP, s->fl_pg_order); + pg = __dev_alloc_pages(gfp, s->fl_pg_order); if (unlikely(!pg)) { q->large_alloc_failed++; break; /* fall back to single pages */ @@ -598,7 +617,7 @@ static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n, alloc_small_pages: while (n--) { - pg = __skb_alloc_page(gfp, NULL); + pg = __dev_alloc_page(gfp); if (unlikely(!pg)) { q->alloc_failed++; break; @@ -799,7 +818,7 @@ static void write_sgl(const struct sk_buff *skb, struct sge_txq *q, sgl->addr0 = cpu_to_be64(addr[1]); } - sgl->cmd_nsge = htonl(ULPTX_CMD(ULP_TX_SC_DSGL) | ULPTX_NSGE(nfrags)); + sgl->cmd_nsge = htonl(ULPTX_CMD_V(ULP_TX_SC_DSGL) | ULPTX_NSGE(nfrags)); if (likely(--nfrags == 0)) return; /* @@ -834,8 +853,8 @@ static void write_sgl(const struct sk_buff *skb, struct sge_txq *q, } /* This function copies 64 byte coalesced work request to - * memory mapped BAR2 space(user space writes). - * For coalesced WR SGE, fetches data from the FIFO instead of from Host. + * memory mapped BAR2 space. For coalesced WR SGE fetches + * data from the FIFO instead of from Host. */ static void cxgb_pio_copy(u64 __iomem *dst, u64 *src) { @@ -859,30 +878,67 @@ static void cxgb_pio_copy(u64 __iomem *dst, u64 *src) */ static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n) { - unsigned int *wr, index; - unsigned long flags; - wmb(); /* write descriptors before telling HW */ - spin_lock_irqsave(&q->db_lock, flags); - if (!q->db_disabled) { - if (is_t4(adap->params.chip)) { + + /* If we don't have access to the new User Doorbell (T5+), use the old + * doorbell mechanism; otherwise use the new BAR2 mechanism. + */ + if (unlikely(q->bar2_addr == NULL)) { + u32 val = PIDX(n); + unsigned long flags; + + /* For T4 we need to participate in the Doorbell Recovery + * mechanism. + */ + spin_lock_irqsave(&q->db_lock, flags); + if (!q->db_disabled) t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), - QID(q->cntxt_id) | PIDX(n)); + QID(q->cntxt_id) | val); + else + q->db_pidx_inc += n; + q->db_pidx = q->pidx; + spin_unlock_irqrestore(&q->db_lock, flags); + } else { + u32 val = PIDX_T5(n); + + /* T4 and later chips share the same PIDX field offset within + * the doorbell, but T5 and later shrank the field in order to + * gain a bit for Doorbell Priority. The field was absurdly + * large in the first place (14 bits) so we just use the T5 + * and later limits and warn if a Queue ID is too large. + */ + WARN_ON(val & DBPRIO(1)); + + /* If we're only writing a single TX Descriptor and we can use + * Inferred QID registers, we can use the Write Combining + * Gather Buffer; otherwise we use the simple doorbell. + */ + if (n == 1 && q->bar2_qid == 0) { + int index = (q->pidx + ? (q->pidx - 1) + : (q->size - 1)); + u64 *wr = (u64 *)&q->desc[index]; + + cxgb_pio_copy((u64 __iomem *) + (q->bar2_addr + SGE_UDB_WCDOORBELL), + wr); } else { - if (n == 1) { - index = q->pidx ? (q->pidx - 1) : (q->size - 1); - wr = (unsigned int *)&q->desc[index]; - cxgb_pio_copy((u64 __iomem *) - (adap->bar2 + q->udb + 64), - (u64 *)wr); - } else - writel(n, adap->bar2 + q->udb + 8); - wmb(); + writel(val | QID(q->bar2_qid), + q->bar2_addr + SGE_UDB_KDOORBELL); } - } else - q->db_pidx_inc += n; - q->db_pidx = q->pidx; - spin_unlock_irqrestore(&q->db_lock, flags); + + /* This Write Memory Barrier will force the write to the User + * Doorbell area to be flushed. This is needed to prevent + * writes on different CPUs for the same queue from hitting + * the adapter out of order. This is required when some Work + * Requests take the Write Combine Gather Buffer path (user + * doorbell area offset [SGE_UDB_WCDOORBELL..+63]) and some + * take the traditional path where we simply increment the + * PIDX (User Doorbell area SGE_UDB_KDOORBELL) and have the + * hardware DMA read the actual Work Request. + */ + wmb(); + } } /** @@ -1041,10 +1097,10 @@ out_free: dev_kfree_skb_any(skb); goto out_free; } - wr_mid = FW_WR_LEN16(DIV_ROUND_UP(flits, 2)); + wr_mid = FW_WR_LEN16_V(DIV_ROUND_UP(flits, 2)); if (unlikely(credits < ETHTXQ_STOP_THRES)) { eth_txq_stop(q); - wr_mid |= FW_WR_EQUEQ | FW_WR_EQUIQ; + wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F; } wr = (void *)&q->q.desc[q->q.pidx]; @@ -1061,8 +1117,8 @@ out_free: dev_kfree_skb_any(skb); int eth_xtra_len = skb_network_offset(skb) - ETH_HLEN; len += sizeof(*lso); - wr->op_immdlen = htonl(FW_WR_OP(FW_ETH_TX_PKT_WR) | - FW_WR_IMMDLEN(len)); + wr->op_immdlen = htonl(FW_WR_OP_V(FW_ETH_TX_PKT_WR) | + FW_WR_IMMDLEN_V(len)); lso->c.lso_ctrl = htonl(LSO_OPCODE(CPL_TX_PKT_LSO) | LSO_FIRST_SLICE | LSO_LAST_SLICE | LSO_IPV6(v6) | @@ -1072,7 +1128,10 @@ out_free: dev_kfree_skb_any(skb); lso->c.ipid_ofst = htons(0); lso->c.mss = htons(ssi->gso_size); lso->c.seqno_offset = htonl(0); - lso->c.len = htonl(skb->len); + if (is_t4(adap->params.chip)) + lso->c.len = htonl(skb->len); + else + lso->c.len = htonl(LSO_T5_XFER_SIZE(skb->len)); cpl = (void *)(lso + 1); cntrl = TXPKT_CSUM_TYPE(v6 ? TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) | TXPKT_IPHDR_LEN(l3hdr_len) | @@ -1081,8 +1140,8 @@ out_free: dev_kfree_skb_any(skb); q->tx_cso += ssi->gso_segs; } else { len += sizeof(*cpl); - wr->op_immdlen = htonl(FW_WR_OP(FW_ETH_TX_PKT_WR) | - FW_WR_IMMDLEN(len)); + wr->op_immdlen = htonl(FW_WR_OP_V(FW_ETH_TX_PKT_WR) | + FW_WR_IMMDLEN_V(len)); cpl = (void *)(wr + 1); if (skb->ip_summed == CHECKSUM_PARTIAL) { cntrl = hwcsum(skb) | TXPKT_IPCSUM_DIS; @@ -1170,7 +1229,7 @@ static void ctrlq_check_stop(struct sge_ctrl_txq *q, struct fw_wr_hdr *wr) { reclaim_completed_tx_imm(&q->q); if (unlikely(txq_avail(&q->q) < TXQ_STOP_THRES)) { - wr->lo |= htonl(FW_WR_EQUEQ | FW_WR_EQUIQ); + wr->lo |= htonl(FW_WR_EQUEQ_F | FW_WR_EQUIQ_F); q->q.stops++; q->full = 1; } @@ -1352,7 +1411,7 @@ static void ofldtxq_stop(struct sge_ofld_txq *q, struct sk_buff *skb) { struct fw_wr_hdr *wr = (struct fw_wr_hdr *)skb->data; - wr->lo |= htonl(FW_WR_EQUEQ | FW_WR_EQUIQ); + wr->lo |= htonl(FW_WR_EQUEQ_F | FW_WR_EQUIQ_F); q->q.stops++; q->full = 1; } @@ -1916,16 +1975,45 @@ static int napi_rx_handler(struct napi_struct *napi, int budget) unsigned int params; struct sge_rspq *q = container_of(napi, struct sge_rspq, napi); int work_done = process_responses(q, budget); + u32 val; if (likely(work_done < budget)) { + int timer_index; + napi_complete(napi); - params = q->next_intr_params; - q->next_intr_params = q->intr_params; + timer_index = QINTR_TIMER_IDX_GET(q->next_intr_params); + + if (q->adaptive_rx) { + if (work_done > max(timer_pkt_quota[timer_index], + MIN_NAPI_WORK)) + timer_index = (timer_index + 1); + else + timer_index = timer_index - 1; + + timer_index = clamp(timer_index, 0, SGE_TIMERREGS - 1); + q->next_intr_params = QINTR_TIMER_IDX(timer_index) | + V_QINTR_CNT_EN; + params = q->next_intr_params; + } else { + params = q->next_intr_params; + q->next_intr_params = q->intr_params; + } } else params = QINTR_TIMER_IDX(7); - t4_write_reg(q->adap, MYPF_REG(SGE_PF_GTS), CIDXINC(work_done) | - INGRESSQID((u32)q->cntxt_id) | SEINTARM(params)); + val = CIDXINC(work_done) | SEINTARM(params); + + /* If we don't have access to the new User GTS (T5+), use the old + * doorbell mechanism; otherwise use the new BAR2 mechanism. + */ + if (unlikely(q->bar2_addr == NULL)) { + t4_write_reg(q->adap, MYPF_REG(SGE_PF_GTS), + val | INGRESSQID((u32)q->cntxt_id)); + } else { + writel(val | INGRESSQID(q->bar2_qid), + q->bar2_addr + SGE_UDB_GTS); + wmb(); + } return work_done; } @@ -1949,6 +2037,7 @@ static unsigned int process_intrq(struct adapter *adap) unsigned int credits; const struct rsp_ctrl *rc; struct sge_rspq *q = &adap->sge.intrq; + u32 val; spin_lock(&adap->sge.intrq_lock); for (credits = 0; ; credits++) { @@ -1967,8 +2056,19 @@ static unsigned int process_intrq(struct adapter *adap) rspq_next(q); } - t4_write_reg(adap, MYPF_REG(SGE_PF_GTS), CIDXINC(credits) | - INGRESSQID(q->cntxt_id) | SEINTARM(q->intr_params)); + val = CIDXINC(credits) | SEINTARM(q->intr_params); + + /* If we don't have access to the new User GTS (T5+), use the old + * doorbell mechanism; otherwise use the new BAR2 mechanism. + */ + if (unlikely(q->bar2_addr == NULL)) { + t4_write_reg(adap, MYPF_REG(SGE_PF_GTS), + val | INGRESSQID(q->cntxt_id)); + } else { + writel(val | INGRESSQID(q->bar2_qid), + q->bar2_addr + SGE_UDB_GTS); + wmb(); + } spin_unlock(&adap->sge.intrq_lock); return credits; } @@ -2149,6 +2249,35 @@ static void sge_tx_timer_cb(unsigned long data) mod_timer(&s->tx_timer, jiffies + (budget ? TX_QCHECK_PERIOD : 2)); } +/** + * bar2_address - return the BAR2 address for an SGE Queue's Registers + * @adapter: the adapter + * @qid: the SGE Queue ID + * @qtype: the SGE Queue Type (Egress or Ingress) + * @pbar2_qid: BAR2 Queue ID or 0 for Queue ID inferred SGE Queues + * + * Returns the BAR2 address for the SGE Queue Registers associated with + * @qid. If BAR2 SGE Registers aren't available, returns NULL. Also + * returns the BAR2 Queue ID to be used with writes to the BAR2 SGE + * Queue Registers. If the BAR2 Queue ID is 0, then "Inferred Queue ID" + * Registers are supported (e.g. the Write Combining Doorbell Buffer). + */ +static void __iomem *bar2_address(struct adapter *adapter, + unsigned int qid, + enum t4_bar2_qtype qtype, + unsigned int *pbar2_qid) +{ + u64 bar2_qoffset; + int ret; + + ret = cxgb4_t4_bar2_sge_qregs(adapter, qid, qtype, + &bar2_qoffset, pbar2_qid); + if (ret) + return NULL; + + return adapter->bar2 + bar2_qoffset; +} + int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, struct net_device *dev, int intr_idx, struct sge_fl *fl, rspq_handler_t hnd) @@ -2167,20 +2296,20 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, return -ENOMEM; memset(&c, 0, sizeof(c)); - c.op_to_vfn = htonl(FW_CMD_OP(FW_IQ_CMD) | FW_CMD_REQUEST | - FW_CMD_WRITE | FW_CMD_EXEC | - FW_IQ_CMD_PFN(adap->fn) | FW_IQ_CMD_VFN(0)); - c.alloc_to_len16 = htonl(FW_IQ_CMD_ALLOC | FW_IQ_CMD_IQSTART(1) | + c.op_to_vfn = htonl(FW_CMD_OP_V(FW_IQ_CMD) | FW_CMD_REQUEST_F | + FW_CMD_WRITE_F | FW_CMD_EXEC_F | + FW_IQ_CMD_PFN_V(adap->fn) | FW_IQ_CMD_VFN_V(0)); + c.alloc_to_len16 = htonl(FW_IQ_CMD_ALLOC_F | FW_IQ_CMD_IQSTART_F | FW_LEN16(c)); - c.type_to_iqandstindex = htonl(FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | - FW_IQ_CMD_IQASYNCH(fwevtq) | FW_IQ_CMD_VIID(pi->viid) | - FW_IQ_CMD_IQANDST(intr_idx < 0) | FW_IQ_CMD_IQANUD(1) | - FW_IQ_CMD_IQANDSTINDEX(intr_idx >= 0 ? intr_idx : + c.type_to_iqandstindex = htonl(FW_IQ_CMD_TYPE_V(FW_IQ_TYPE_FL_INT_CAP) | + FW_IQ_CMD_IQASYNCH_V(fwevtq) | FW_IQ_CMD_VIID_V(pi->viid) | + FW_IQ_CMD_IQANDST_V(intr_idx < 0) | FW_IQ_CMD_IQANUD_V(1) | + FW_IQ_CMD_IQANDSTINDEX_V(intr_idx >= 0 ? intr_idx : -intr_idx - 1)); - c.iqdroprss_to_iqesize = htons(FW_IQ_CMD_IQPCIECH(pi->tx_chan) | - FW_IQ_CMD_IQGTSMODE | - FW_IQ_CMD_IQINTCNTTHRESH(iq->pktcnt_idx) | - FW_IQ_CMD_IQESIZE(ilog2(iq->iqe_len) - 4)); + c.iqdroprss_to_iqesize = htons(FW_IQ_CMD_IQPCIECH_V(pi->tx_chan) | + FW_IQ_CMD_IQGTSMODE_F | + FW_IQ_CMD_IQINTCNTTHRESH_V(iq->pktcnt_idx) | + FW_IQ_CMD_IQESIZE_V(ilog2(iq->iqe_len) - 4)); c.iqsize = htons(iq->size); c.iqaddr = cpu_to_be64(iq->phys_addr); @@ -2193,12 +2322,12 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, goto fl_nomem; flsz = fl->size / 8 + s->stat_len / sizeof(struct tx_desc); - c.iqns_to_fl0congen = htonl(FW_IQ_CMD_FL0PACKEN(1) | - FW_IQ_CMD_FL0FETCHRO(1) | - FW_IQ_CMD_FL0DATARO(1) | - FW_IQ_CMD_FL0PADEN(1)); - c.fl0dcaen_to_fl0cidxfthresh = htons(FW_IQ_CMD_FL0FBMIN(2) | - FW_IQ_CMD_FL0FBMAX(3)); + c.iqns_to_fl0congen = htonl(FW_IQ_CMD_FL0PACKEN_F | + FW_IQ_CMD_FL0FETCHRO_F | + FW_IQ_CMD_FL0DATARO_F | + FW_IQ_CMD_FL0PADEN_F); + c.fl0dcaen_to_fl0cidxfthresh = htons(FW_IQ_CMD_FL0FBMIN_V(2) | + FW_IQ_CMD_FL0FBMAX_V(3)); c.fl0size = htons(flsz); c.fl0addr = cpu_to_be64(fl->addr); } @@ -2214,6 +2343,10 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, iq->next_intr_params = iq->intr_params; iq->cntxt_id = ntohs(c.iqid); iq->abs_id = ntohs(c.physiqid); + iq->bar2_addr = bar2_address(adap, + iq->cntxt_id, + T4_BAR2_QTYPE_INGRESS, + &iq->bar2_qid); iq->size--; /* subtract status entry */ iq->netdev = dev; iq->handler = hnd; @@ -2229,6 +2362,14 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, fl->pidx = fl->cidx = 0; fl->alloc_failed = fl->large_alloc_failed = fl->starving = 0; adap->sge.egr_map[fl->cntxt_id - adap->sge.egr_start] = fl; + + /* Note, we must initialize the BAR2 Free List User Doorbell + * information before refilling the Free List! + */ + fl->bar2_addr = bar2_address(adap, + fl->cntxt_id, + T4_BAR2_QTYPE_EGRESS, + &fl->bar2_qid); refill_fl(adap, fl, fl_cap(fl), GFP_KERNEL); } return 0; @@ -2254,22 +2395,10 @@ err: static void init_txq(struct adapter *adap, struct sge_txq *q, unsigned int id) { q->cntxt_id = id; - if (!is_t4(adap->params.chip)) { - unsigned int s_qpp; - unsigned short udb_density; - unsigned long qpshift; - int page; - - s_qpp = QUEUESPERPAGEPF1 * adap->fn; - udb_density = 1 << QUEUESPERPAGEPF0_GET((t4_read_reg(adap, - SGE_EGRESS_QUEUES_PER_PAGE_PF) >> s_qpp)); - qpshift = PAGE_SHIFT - ilog2(udb_density); - q->udb = q->cntxt_id << qpshift; - q->udb &= PAGE_MASK; - page = q->udb / PAGE_SIZE; - q->udb += (q->cntxt_id - (page * udb_density)) * 128; - } - + q->bar2_addr = bar2_address(adap, + q->cntxt_id, + T4_BAR2_QTYPE_EGRESS, + &q->bar2_qid); q->in_use = 0; q->cidx = q->pidx = 0; q->stops = q->restarts = 0; @@ -2298,21 +2427,22 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, return -ENOMEM; memset(&c, 0, sizeof(c)); - c.op_to_vfn = htonl(FW_CMD_OP(FW_EQ_ETH_CMD) | FW_CMD_REQUEST | - FW_CMD_WRITE | FW_CMD_EXEC | - FW_EQ_ETH_CMD_PFN(adap->fn) | FW_EQ_ETH_CMD_VFN(0)); - c.alloc_to_len16 = htonl(FW_EQ_ETH_CMD_ALLOC | - FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); - c.viid_pkd = htonl(FW_EQ_ETH_CMD_AUTOEQUEQE | - FW_EQ_ETH_CMD_VIID(pi->viid)); - c.fetchszm_to_iqid = htonl(FW_EQ_ETH_CMD_HOSTFCMODE(2) | - FW_EQ_ETH_CMD_PCIECHN(pi->tx_chan) | - FW_EQ_ETH_CMD_FETCHRO(1) | - FW_EQ_ETH_CMD_IQID(iqid)); - c.dcaen_to_eqsize = htonl(FW_EQ_ETH_CMD_FBMIN(2) | - FW_EQ_ETH_CMD_FBMAX(3) | - FW_EQ_ETH_CMD_CIDXFTHRESH(5) | - FW_EQ_ETH_CMD_EQSIZE(nentries)); + c.op_to_vfn = htonl(FW_CMD_OP_V(FW_EQ_ETH_CMD) | FW_CMD_REQUEST_F | + FW_CMD_WRITE_F | FW_CMD_EXEC_F | + FW_EQ_ETH_CMD_PFN_V(adap->fn) | + FW_EQ_ETH_CMD_VFN_V(0)); + c.alloc_to_len16 = htonl(FW_EQ_ETH_CMD_ALLOC_F | + FW_EQ_ETH_CMD_EQSTART_F | FW_LEN16(c)); + c.viid_pkd = htonl(FW_EQ_ETH_CMD_AUTOEQUEQE_F | + FW_EQ_ETH_CMD_VIID_V(pi->viid)); + c.fetchszm_to_iqid = htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(2) | + FW_EQ_ETH_CMD_PCIECHN_V(pi->tx_chan) | + FW_EQ_ETH_CMD_FETCHRO_V(1) | + FW_EQ_ETH_CMD_IQID_V(iqid)); + c.dcaen_to_eqsize = htonl(FW_EQ_ETH_CMD_FBMIN_V(2) | + FW_EQ_ETH_CMD_FBMAX_V(3) | + FW_EQ_ETH_CMD_CIDXFTHRESH_V(5) | + FW_EQ_ETH_CMD_EQSIZE_V(nentries)); c.eqaddr = cpu_to_be64(txq->q.phys_addr); ret = t4_wr_mbox(adap, adap->fn, &c, sizeof(c), &c); @@ -2326,7 +2456,7 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, return ret; } - init_txq(adap, &txq->q, FW_EQ_ETH_CMD_EQID_GET(ntohl(c.eqid_pkd))); + init_txq(adap, &txq->q, FW_EQ_ETH_CMD_EQID_G(ntohl(c.eqid_pkd))); txq->txq = netdevq; txq->tso = txq->tx_cso = txq->vlan_ins = 0; txq->mapping_err = 0; @@ -2351,22 +2481,22 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq, if (!txq->q.desc) return -ENOMEM; - c.op_to_vfn = htonl(FW_CMD_OP(FW_EQ_CTRL_CMD) | FW_CMD_REQUEST | - FW_CMD_WRITE | FW_CMD_EXEC | - FW_EQ_CTRL_CMD_PFN(adap->fn) | - FW_EQ_CTRL_CMD_VFN(0)); - c.alloc_to_len16 = htonl(FW_EQ_CTRL_CMD_ALLOC | - FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c)); - c.cmpliqid_eqid = htonl(FW_EQ_CTRL_CMD_CMPLIQID(cmplqid)); + c.op_to_vfn = htonl(FW_CMD_OP_V(FW_EQ_CTRL_CMD) | FW_CMD_REQUEST_F | + FW_CMD_WRITE_F | FW_CMD_EXEC_F | + FW_EQ_CTRL_CMD_PFN_V(adap->fn) | + FW_EQ_CTRL_CMD_VFN_V(0)); + c.alloc_to_len16 = htonl(FW_EQ_CTRL_CMD_ALLOC_F | + FW_EQ_CTRL_CMD_EQSTART_F | FW_LEN16(c)); + c.cmpliqid_eqid = htonl(FW_EQ_CTRL_CMD_CMPLIQID_V(cmplqid)); c.physeqid_pkd = htonl(0); - c.fetchszm_to_iqid = htonl(FW_EQ_CTRL_CMD_HOSTFCMODE(2) | - FW_EQ_CTRL_CMD_PCIECHN(pi->tx_chan) | - FW_EQ_CTRL_CMD_FETCHRO | - FW_EQ_CTRL_CMD_IQID(iqid)); - c.dcaen_to_eqsize = htonl(FW_EQ_CTRL_CMD_FBMIN(2) | - FW_EQ_CTRL_CMD_FBMAX(3) | - FW_EQ_CTRL_CMD_CIDXFTHRESH(5) | - FW_EQ_CTRL_CMD_EQSIZE(nentries)); + c.fetchszm_to_iqid = htonl(FW_EQ_CTRL_CMD_HOSTFCMODE_V(2) | + FW_EQ_CTRL_CMD_PCIECHN_V(pi->tx_chan) | + FW_EQ_CTRL_CMD_FETCHRO_F | + FW_EQ_CTRL_CMD_IQID_V(iqid)); + c.dcaen_to_eqsize = htonl(FW_EQ_CTRL_CMD_FBMIN_V(2) | + FW_EQ_CTRL_CMD_FBMAX_V(3) | + FW_EQ_CTRL_CMD_CIDXFTHRESH_V(5) | + FW_EQ_CTRL_CMD_EQSIZE_V(nentries)); c.eqaddr = cpu_to_be64(txq->q.phys_addr); ret = t4_wr_mbox(adap, adap->fn, &c, sizeof(c), &c); @@ -2378,7 +2508,7 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq, return ret; } - init_txq(adap, &txq->q, FW_EQ_CTRL_CMD_EQID_GET(ntohl(c.cmpliqid_eqid))); + init_txq(adap, &txq->q, FW_EQ_CTRL_CMD_EQID_G(ntohl(c.cmpliqid_eqid))); txq->adap = adap; skb_queue_head_init(&txq->sendq); tasklet_init(&txq->qresume_tsk, restart_ctrlq, (unsigned long)txq); @@ -2405,20 +2535,20 @@ int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq, return -ENOMEM; memset(&c, 0, sizeof(c)); - c.op_to_vfn = htonl(FW_CMD_OP(FW_EQ_OFLD_CMD) | FW_CMD_REQUEST | - FW_CMD_WRITE | FW_CMD_EXEC | - FW_EQ_OFLD_CMD_PFN(adap->fn) | - FW_EQ_OFLD_CMD_VFN(0)); - c.alloc_to_len16 = htonl(FW_EQ_OFLD_CMD_ALLOC | - FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c)); - c.fetchszm_to_iqid = htonl(FW_EQ_OFLD_CMD_HOSTFCMODE(2) | - FW_EQ_OFLD_CMD_PCIECHN(pi->tx_chan) | - FW_EQ_OFLD_CMD_FETCHRO(1) | - FW_EQ_OFLD_CMD_IQID(iqid)); - c.dcaen_to_eqsize = htonl(FW_EQ_OFLD_CMD_FBMIN(2) | - FW_EQ_OFLD_CMD_FBMAX(3) | - FW_EQ_OFLD_CMD_CIDXFTHRESH(5) | - FW_EQ_OFLD_CMD_EQSIZE(nentries)); + c.op_to_vfn = htonl(FW_CMD_OP_V(FW_EQ_OFLD_CMD) | FW_CMD_REQUEST_F | + FW_CMD_WRITE_F | FW_CMD_EXEC_F | + FW_EQ_OFLD_CMD_PFN_V(adap->fn) | + FW_EQ_OFLD_CMD_VFN_V(0)); + c.alloc_to_len16 = htonl(FW_EQ_OFLD_CMD_ALLOC_F | + FW_EQ_OFLD_CMD_EQSTART_F | FW_LEN16(c)); + c.fetchszm_to_iqid = htonl(FW_EQ_OFLD_CMD_HOSTFCMODE_V(2) | + FW_EQ_OFLD_CMD_PCIECHN_V(pi->tx_chan) | + FW_EQ_OFLD_CMD_FETCHRO_F | + FW_EQ_OFLD_CMD_IQID_V(iqid)); + c.dcaen_to_eqsize = htonl(FW_EQ_OFLD_CMD_FBMIN_V(2) | + FW_EQ_OFLD_CMD_FBMAX_V(3) | + FW_EQ_OFLD_CMD_CIDXFTHRESH_V(5) | + FW_EQ_OFLD_CMD_EQSIZE_V(nentries)); c.eqaddr = cpu_to_be64(txq->q.phys_addr); ret = t4_wr_mbox(adap, adap->fn, &c, sizeof(c), &c); @@ -2432,7 +2562,7 @@ int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq, return ret; } - init_txq(adap, &txq->q, FW_EQ_OFLD_CMD_EQID_GET(ntohl(c.eqid_pkd))); + init_txq(adap, &txq->q, FW_EQ_OFLD_CMD_EQID_G(ntohl(c.eqid_pkd))); txq->adap = adap; skb_queue_head_init(&txq->sendq); tasklet_init(&txq->qresume_tsk, restart_ofldq, (unsigned long)txq); @@ -2789,7 +2919,8 @@ static int t4_sge_init_hard(struct adapter *adap) int t4_sge_init(struct adapter *adap) { struct sge *s = &adap->sge; - u32 sge_control, sge_conm_ctrl; + u32 sge_control, sge_control2, sge_conm_ctrl; + unsigned int ingpadboundary, ingpackboundary; int ret, egress_threshold; /* @@ -2799,8 +2930,31 @@ int t4_sge_init(struct adapter *adap) sge_control = t4_read_reg(adap, SGE_CONTROL); s->pktshift = PKTSHIFT_GET(sge_control); s->stat_len = (sge_control & EGRSTATUSPAGESIZE_MASK) ? 128 : 64; - s->fl_align = 1 << (INGPADBOUNDARY_GET(sge_control) + - X_INGPADBOUNDARY_SHIFT); + + /* T4 uses a single control field to specify both the PCIe Padding and + * Packing Boundary. T5 introduced the ability to specify these + * separately. The actual Ingress Packet Data alignment boundary + * within Packed Buffer Mode is the maximum of these two + * specifications. + */ + ingpadboundary = 1 << (INGPADBOUNDARY_GET(sge_control) + + X_INGPADBOUNDARY_SHIFT); + if (is_t4(adap->params.chip)) { + s->fl_align = ingpadboundary; + } else { + /* T5 has a different interpretation of one of the PCIe Packing + * Boundary values. + */ + sge_control2 = t4_read_reg(adap, SGE_CONTROL2_A); + ingpackboundary = INGPACKBOUNDARY_G(sge_control2); + if (ingpackboundary == INGPACKBOUNDARY_16B_X) + ingpackboundary = 16; + else + ingpackboundary = 1 << (ingpackboundary + + INGPACKBOUNDARY_SHIFT_X); + + s->fl_align = max(ingpadboundary, ingpackboundary); + } if (adap->flags & USING_SOFT_PARAMS) ret = t4_sge_init_soft(adap); |