diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlxsw/pci.c')
-rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/pci.c | 925 |
1 files changed, 644 insertions, 281 deletions
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index af99bf17eb36..5b44c931b660 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -8,12 +8,12 @@ #include <linux/device.h> #include <linux/pci.h> #include <linux/interrupt.h> -#include <linux/wait.h> #include <linux/types.h> #include <linux/skbuff.h> #include <linux/if_vlan.h> #include <linux/log2.h> #include <linux/string.h> +#include <net/page_pool/helpers.h> #include "pci_hw.h" #include "pci.h" @@ -21,6 +21,7 @@ #include "cmd.h" #include "port.h" #include "resources.h" +#include "txheader.h" #define mlxsw_pci_write32(mlxsw_pci, reg, val) \ iowrite32be(val, (mlxsw_pci)->hw_addr + (MLXSW_PCI_ ## reg)) @@ -36,6 +37,11 @@ enum mlxsw_pci_queue_type { #define MLXSW_PCI_QUEUE_TYPE_COUNT 4 +enum mlxsw_pci_cq_type { + MLXSW_PCI_CQ_SDQ, + MLXSW_PCI_CQ_RDQ, +}; + static const u16 mlxsw_pci_doorbell_type_offset[] = { MLXSW_PCI_DOORBELL_SDQ_OFFSET, /* for type MLXSW_PCI_QUEUE_TYPE_SDQ */ MLXSW_PCI_DOORBELL_RDQ_OFFSET, /* for type MLXSW_PCI_QUEUE_TYPE_RDQ */ @@ -57,15 +63,11 @@ struct mlxsw_pci_mem_item { }; struct mlxsw_pci_queue_elem_info { + struct page *pages[MLXSW_PCI_WQE_SG_ENTRIES]; char *elem; /* pointer to actual dma mapped element mem chunk */ - union { - struct { - struct sk_buff *skb; - } sdq; - struct { - struct sk_buff *skb; - } rdq; - } u; + struct { + struct sk_buff *skb; + } sdq; }; struct mlxsw_pci_queue { @@ -78,19 +80,20 @@ struct mlxsw_pci_queue { u8 num; /* queue number */ u8 elem_size; /* size of one element */ enum mlxsw_pci_queue_type type; - struct tasklet_struct tasklet; /* queue processing tasklet */ struct mlxsw_pci *pci; union { struct { - u32 comp_sdq_count; - u32 comp_rdq_count; enum mlxsw_pci_cqe_v v; + struct mlxsw_pci_queue *dq; + struct napi_struct napi; + struct page_pool *page_pool; } cq; struct { - u32 ev_cmd_count; - u32 ev_comp_count; - u32 ev_other_count; + struct tasklet_struct tasklet; } eq; + struct { + struct mlxsw_pci_queue *cq; + } rdq; } u; }; @@ -109,6 +112,7 @@ struct mlxsw_pci { bool cff_support; enum mlxsw_cmd_mbox_config_profile_lag_mode lag_mode; enum mlxsw_cmd_mbox_config_profile_flood_mode flood_mode; + u8 num_sg_entries; /* Number of scatter/gather entries for packets. */ struct mlxsw_pci_queue_type_group queues[MLXSW_PCI_QUEUE_TYPE_COUNT]; u32 doorbell_offset; struct mlxsw_core *core; @@ -120,9 +124,6 @@ struct mlxsw_pci { struct mlxsw_pci_mem_item out_mbox; struct mlxsw_pci_mem_item in_mbox; struct mutex lock; /* Lock access to command registers */ - bool nopoll; - wait_queue_head_t wait; - bool wait_done; struct { u8 status; u64 out_param; @@ -131,13 +132,43 @@ struct mlxsw_pci { struct mlxsw_bus_info bus_info; const struct pci_device_id *id; enum mlxsw_pci_cqe_v max_cqe_ver; /* Maximal supported CQE version */ - u8 num_sdq_cqs; /* Number of CQs used for SDQs */ + u8 num_cqs; /* Number of CQs */ + u8 num_sdqs; /* Number of SDQs */ bool skip_reset; + struct net_device *napi_dev_tx; + struct net_device *napi_dev_rx; }; -static void mlxsw_pci_queue_tasklet_schedule(struct mlxsw_pci_queue *q) +static int mlxsw_pci_napi_devs_init(struct mlxsw_pci *mlxsw_pci) { - tasklet_schedule(&q->tasklet); + int err; + + mlxsw_pci->napi_dev_tx = alloc_netdev_dummy(0); + if (!mlxsw_pci->napi_dev_tx) + return -ENOMEM; + strscpy(mlxsw_pci->napi_dev_tx->name, "mlxsw_tx", + sizeof(mlxsw_pci->napi_dev_tx->name)); + + mlxsw_pci->napi_dev_rx = alloc_netdev_dummy(0); + if (!mlxsw_pci->napi_dev_rx) { + err = -ENOMEM; + goto err_alloc_rx; + } + strscpy(mlxsw_pci->napi_dev_rx->name, "mlxsw_rx", + sizeof(mlxsw_pci->napi_dev_rx->name)); + dev_set_threaded(mlxsw_pci->napi_dev_rx, true); + + return 0; + +err_alloc_rx: + free_netdev(mlxsw_pci->napi_dev_tx); + return err; +} + +static void mlxsw_pci_napi_devs_fini(struct mlxsw_pci *mlxsw_pci) +{ + free_netdev(mlxsw_pci->napi_dev_rx); + free_netdev(mlxsw_pci->napi_dev_tx); } static char *__mlxsw_pci_queue_elem_get(struct mlxsw_pci_queue *q, @@ -187,25 +218,6 @@ mlxsw_pci_queue_type_group_get(struct mlxsw_pci *mlxsw_pci, return &mlxsw_pci->queues[q_type]; } -static u8 __mlxsw_pci_queue_count(struct mlxsw_pci *mlxsw_pci, - enum mlxsw_pci_queue_type q_type) -{ - struct mlxsw_pci_queue_type_group *queue_group; - - queue_group = mlxsw_pci_queue_type_group_get(mlxsw_pci, q_type); - return queue_group->count; -} - -static u8 mlxsw_pci_sdq_count(struct mlxsw_pci *mlxsw_pci) -{ - return __mlxsw_pci_queue_count(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_SDQ); -} - -static u8 mlxsw_pci_cq_count(struct mlxsw_pci *mlxsw_pci) -{ - return __mlxsw_pci_queue_count(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_CQ); -} - static struct mlxsw_pci_queue * __mlxsw_pci_queue_get(struct mlxsw_pci *mlxsw_pci, enum mlxsw_pci_queue_type q_type, u8 q_num) @@ -220,23 +232,16 @@ static struct mlxsw_pci_queue *mlxsw_pci_sdq_get(struct mlxsw_pci *mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_SDQ, q_num); } -static struct mlxsw_pci_queue *mlxsw_pci_rdq_get(struct mlxsw_pci *mlxsw_pci, - u8 q_num) -{ - return __mlxsw_pci_queue_get(mlxsw_pci, - MLXSW_PCI_QUEUE_TYPE_RDQ, q_num); -} - static struct mlxsw_pci_queue *mlxsw_pci_cq_get(struct mlxsw_pci *mlxsw_pci, u8 q_num) { return __mlxsw_pci_queue_get(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_CQ, q_num); } -static struct mlxsw_pci_queue *mlxsw_pci_eq_get(struct mlxsw_pci *mlxsw_pci, - u8 q_num) +static struct mlxsw_pci_queue *mlxsw_pci_eq_get(struct mlxsw_pci *mlxsw_pci) { - return __mlxsw_pci_queue_get(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_EQ, q_num); + /* There is only one EQ at index 0. */ + return __mlxsw_pci_queue_get(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_EQ, 0); } static void __mlxsw_pci_queue_doorbell_set(struct mlxsw_pci *mlxsw_pci, @@ -291,7 +296,9 @@ static dma_addr_t __mlxsw_pci_queue_page_get(struct mlxsw_pci_queue *q, static int mlxsw_pci_sdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, struct mlxsw_pci_queue *q) { + struct mlxsw_pci_queue *cq; int tclass; + u8 cq_num; int lp; int i; int err; @@ -304,7 +311,8 @@ static int mlxsw_pci_sdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, MLXSW_CMD_MBOX_SW2HW_DQ_SDQ_LP_WQE; /* Set CQ of same number of this SDQ. */ - mlxsw_cmd_mbox_sw2hw_dq_cq_set(mbox, q->num); + cq_num = q->num; + mlxsw_cmd_mbox_sw2hw_dq_cq_set(mbox, cq_num); mlxsw_cmd_mbox_sw2hw_dq_sdq_lp_set(mbox, lp); mlxsw_cmd_mbox_sw2hw_dq_sdq_tclass_set(mbox, tclass); mlxsw_cmd_mbox_sw2hw_dq_log2_dq_sz_set(mbox, 3); /* 8 pages */ @@ -317,6 +325,9 @@ static int mlxsw_pci_sdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, err = mlxsw_cmd_sw2hw_sdq(mlxsw_pci->core, mbox, q->num); if (err) return err; + + cq = mlxsw_pci_cq_get(mlxsw_pci, cq_num); + cq->u.cq.dq = q; mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); return 0; } @@ -327,6 +338,29 @@ static void mlxsw_pci_sdq_fini(struct mlxsw_pci *mlxsw_pci, mlxsw_cmd_hw2sw_sdq(mlxsw_pci->core, q->num); } +#define MLXSW_PCI_SKB_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN) + +#define MLXSW_PCI_RX_BUF_SW_OVERHEAD \ + (MLXSW_PCI_SKB_HEADROOM + \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) + +static void +mlxsw_pci_wqe_rx_frag_set(struct mlxsw_pci *mlxsw_pci, struct page *page, + char *wqe, int index, size_t frag_len) +{ + dma_addr_t mapaddr; + + mapaddr = page_pool_get_dma_addr(page); + + if (index == 0) { + mapaddr += MLXSW_PCI_SKB_HEADROOM; + frag_len = frag_len - MLXSW_PCI_RX_BUF_SW_OVERHEAD; + } + + mlxsw_pci_wqe_address_set(wqe, index, mapaddr); + mlxsw_pci_wqe_byte_count_set(wqe, index, frag_len); +} + static int mlxsw_pci_wqe_frag_map(struct mlxsw_pci *mlxsw_pci, char *wqe, int index, char *frag_data, size_t frag_len, int direction) @@ -356,51 +390,158 @@ static void mlxsw_pci_wqe_frag_unmap(struct mlxsw_pci *mlxsw_pci, char *wqe, dma_unmap_single(&pdev->dev, mapaddr, frag_len, direction); } -static int mlxsw_pci_rdq_skb_alloc(struct mlxsw_pci *mlxsw_pci, - struct mlxsw_pci_queue_elem_info *elem_info, - gfp_t gfp) +static struct sk_buff *mlxsw_pci_rdq_build_skb(struct mlxsw_pci_queue *q, + struct page *pages[], + u16 byte_count) { - size_t buf_len = MLXSW_PORT_MAX_MTU; - char *wqe = elem_info->elem; + struct mlxsw_pci_queue *cq = q->u.rdq.cq; + unsigned int linear_data_size; + struct page_pool *page_pool; struct sk_buff *skb; - int err; + int page_index = 0; + bool linear_only; + void *data; + + linear_only = byte_count + MLXSW_PCI_RX_BUF_SW_OVERHEAD <= PAGE_SIZE; + linear_data_size = linear_only ? byte_count : + PAGE_SIZE - + MLXSW_PCI_RX_BUF_SW_OVERHEAD; + + page_pool = cq->u.cq.page_pool; + page_pool_dma_sync_for_cpu(page_pool, pages[page_index], + MLXSW_PCI_SKB_HEADROOM, linear_data_size); + + data = page_address(pages[page_index]); + net_prefetch(data); + + skb = napi_build_skb(data, PAGE_SIZE); + if (unlikely(!skb)) + return ERR_PTR(-ENOMEM); + + skb_reserve(skb, MLXSW_PCI_SKB_HEADROOM); + skb_put(skb, linear_data_size); + + if (linear_only) + return skb; - skb = __netdev_alloc_skb_ip_align(NULL, buf_len, gfp); - if (!skb) + byte_count -= linear_data_size; + page_index++; + + while (byte_count > 0) { + unsigned int frag_size; + struct page *page; + + page = pages[page_index]; + frag_size = min(byte_count, PAGE_SIZE); + page_pool_dma_sync_for_cpu(page_pool, page, 0, frag_size); + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + page, 0, frag_size, PAGE_SIZE); + byte_count -= frag_size; + page_index++; + } + + return skb; +} + +static int mlxsw_pci_rdq_page_alloc(struct mlxsw_pci_queue *q, + struct mlxsw_pci_queue_elem_info *elem_info, + int index) +{ + struct mlxsw_pci_queue *cq = q->u.rdq.cq; + char *wqe = elem_info->elem; + struct page *page; + + page = page_pool_dev_alloc_pages(cq->u.cq.page_pool); + if (unlikely(!page)) return -ENOMEM; - err = mlxsw_pci_wqe_frag_map(mlxsw_pci, wqe, 0, skb->data, - buf_len, DMA_FROM_DEVICE); - if (err) - goto err_frag_map; + mlxsw_pci_wqe_rx_frag_set(q->pci, page, wqe, index, PAGE_SIZE); + elem_info->pages[index] = page; + return 0; +} + +static void mlxsw_pci_rdq_page_free(struct mlxsw_pci_queue *q, + struct mlxsw_pci_queue_elem_info *elem_info, + int index) +{ + struct mlxsw_pci_queue *cq = q->u.rdq.cq; + + page_pool_put_page(cq->u.cq.page_pool, elem_info->pages[index], -1, + false); +} + +static u8 mlxsw_pci_num_sg_entries_get(u16 byte_count) +{ + return DIV_ROUND_UP(byte_count + MLXSW_PCI_RX_BUF_SW_OVERHEAD, + PAGE_SIZE); +} + +static int +mlxsw_pci_elem_info_pages_ref_store(const struct mlxsw_pci_queue *q, + const struct mlxsw_pci_queue_elem_info *el, + u16 byte_count, struct page *pages[], + u8 *p_num_sg_entries) +{ + u8 num_sg_entries; + int i; - elem_info->u.rdq.skb = skb; + num_sg_entries = mlxsw_pci_num_sg_entries_get(byte_count); + if (WARN_ON_ONCE(num_sg_entries > q->pci->num_sg_entries)) + return -EINVAL; + + for (i = 0; i < num_sg_entries; i++) + pages[i] = el->pages[i]; + + *p_num_sg_entries = num_sg_entries; return 0; +} + +static int +mlxsw_pci_rdq_pages_alloc(struct mlxsw_pci_queue *q, + struct mlxsw_pci_queue_elem_info *elem_info, + u8 num_sg_entries) +{ + struct page *old_pages[MLXSW_PCI_WQE_SG_ENTRIES]; + struct mlxsw_pci_queue *cq = q->u.rdq.cq; + int i, err; + + for (i = 0; i < num_sg_entries; i++) { + old_pages[i] = elem_info->pages[i]; + err = mlxsw_pci_rdq_page_alloc(q, elem_info, i); + if (err) { + dev_err_ratelimited(&q->pci->pdev->dev, "Failed to alloc page\n"); + goto err_page_alloc; + } + } + + return 0; + +err_page_alloc: + for (i--; i >= 0; i--) + page_pool_recycle_direct(cq->u.cq.page_pool, old_pages[i]); -err_frag_map: - dev_kfree_skb_any(skb); return err; } -static void mlxsw_pci_rdq_skb_free(struct mlxsw_pci *mlxsw_pci, - struct mlxsw_pci_queue_elem_info *elem_info) +static void +mlxsw_pci_rdq_pages_recycle(struct mlxsw_pci_queue *q, struct page *pages[], + u8 num_sg_entries) { - struct sk_buff *skb; - char *wqe; - - skb = elem_info->u.rdq.skb; - wqe = elem_info->elem; + struct mlxsw_pci_queue *cq = q->u.rdq.cq; + int i; - mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE); - dev_kfree_skb_any(skb); + for (i = 0; i < num_sg_entries; i++) + page_pool_recycle_direct(cq->u.cq.page_pool, pages[i]); } static int mlxsw_pci_rdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, struct mlxsw_pci_queue *q) { struct mlxsw_pci_queue_elem_info *elem_info; - u8 sdq_count = mlxsw_pci_sdq_count(mlxsw_pci); - int i; + u8 sdq_count = mlxsw_pci->num_sdqs; + struct mlxsw_pci_queue *cq; + u8 cq_num; + int i, j; int err; q->producer_counter = 0; @@ -409,7 +550,8 @@ static int mlxsw_pci_rdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, /* Set CQ of same number of this RDQ with base * above SDQ count as the lower ones are assigned to SDQs. */ - mlxsw_cmd_mbox_sw2hw_dq_cq_set(mbox, sdq_count + q->num); + cq_num = sdq_count + q->num; + mlxsw_cmd_mbox_sw2hw_dq_cq_set(mbox, cq_num); mlxsw_cmd_mbox_sw2hw_dq_log2_dq_sz_set(mbox, 3); /* 8 pages */ for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) { dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i); @@ -421,14 +563,21 @@ static int mlxsw_pci_rdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, if (err) return err; + cq = mlxsw_pci_cq_get(mlxsw_pci, cq_num); + cq->u.cq.dq = q; + q->u.rdq.cq = cq; + mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); for (i = 0; i < q->count; i++) { elem_info = mlxsw_pci_queue_elem_info_producer_get(q); BUG_ON(!elem_info); - err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info, GFP_KERNEL); - if (err) - goto rollback; + + for (j = 0; j < mlxsw_pci->num_sg_entries; j++) { + err = mlxsw_pci_rdq_page_alloc(q, elem_info, j); + if (err) + goto rollback; + } /* Everything is set up, ring doorbell to pass elem to HW */ q->producer_counter++; mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); @@ -439,8 +588,12 @@ static int mlxsw_pci_rdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, rollback: for (i--; i >= 0; i--) { elem_info = mlxsw_pci_queue_elem_info_get(q, i); - mlxsw_pci_rdq_skb_free(mlxsw_pci, elem_info); + for (j--; j >= 0; j--) + mlxsw_pci_rdq_page_free(q, elem_info, j); + j = mlxsw_pci->num_sg_entries; } + q->u.rdq.cq = NULL; + cq->u.cq.dq = NULL; mlxsw_cmd_hw2sw_rdq(mlxsw_pci->core, q->num); return err; @@ -450,12 +603,13 @@ static void mlxsw_pci_rdq_fini(struct mlxsw_pci *mlxsw_pci, struct mlxsw_pci_queue *q) { struct mlxsw_pci_queue_elem_info *elem_info; - int i; + int i, j; mlxsw_cmd_hw2sw_rdq(mlxsw_pci->core, q->num); for (i = 0; i < q->count; i++) { elem_info = mlxsw_pci_queue_elem_info_get(q, i); - mlxsw_pci_rdq_skb_free(mlxsw_pci, elem_info); + for (j = 0; j < mlxsw_pci->num_sg_entries; j++) + mlxsw_pci_rdq_page_free(q, elem_info, j); } } @@ -465,54 +619,11 @@ static void mlxsw_pci_cq_pre_init(struct mlxsw_pci *mlxsw_pci, q->u.cq.v = mlxsw_pci->max_cqe_ver; if (q->u.cq.v == MLXSW_PCI_CQE_V2 && - q->num < mlxsw_pci->num_sdq_cqs && + q->num < mlxsw_pci->num_sdqs && !mlxsw_core_sdq_supports_cqe_v2(mlxsw_pci->core)) q->u.cq.v = MLXSW_PCI_CQE_V1; } -static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, - struct mlxsw_pci_queue *q) -{ - int i; - int err; - - q->consumer_counter = 0; - - for (i = 0; i < q->count; i++) { - char *elem = mlxsw_pci_queue_elem_get(q, i); - - mlxsw_pci_cqe_owner_set(q->u.cq.v, elem, 1); - } - - if (q->u.cq.v == MLXSW_PCI_CQE_V1) - mlxsw_cmd_mbox_sw2hw_cq_cqe_ver_set(mbox, - MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_1); - else if (q->u.cq.v == MLXSW_PCI_CQE_V2) - mlxsw_cmd_mbox_sw2hw_cq_cqe_ver_set(mbox, - MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_2); - - mlxsw_cmd_mbox_sw2hw_cq_c_eqn_set(mbox, MLXSW_PCI_EQ_COMP_NUM); - mlxsw_cmd_mbox_sw2hw_cq_st_set(mbox, 0); - mlxsw_cmd_mbox_sw2hw_cq_log_cq_size_set(mbox, ilog2(q->count)); - for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) { - dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i); - - mlxsw_cmd_mbox_sw2hw_cq_pa_set(mbox, i, mapaddr); - } - err = mlxsw_cmd_sw2hw_cq(mlxsw_pci->core, mbox, q->num); - if (err) - return err; - mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); - mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); - return 0; -} - -static void mlxsw_pci_cq_fini(struct mlxsw_pci *mlxsw_pci, - struct mlxsw_pci_queue *q) -{ - mlxsw_cmd_hw2sw_cq(mlxsw_pci->core, q->num); -} - static unsigned int mlxsw_pci_read32_off(struct mlxsw_pci *mlxsw_pci, ptrdiff_t off) { @@ -543,7 +654,7 @@ static void mlxsw_pci_cqe_sdq_handle(struct mlxsw_pci *mlxsw_pci, struct mlxsw_pci_queue *q, u16 consumer_counter_limit, enum mlxsw_pci_cqe_v cqe_v, - char *cqe) + char *cqe, int budget) { struct pci_dev *pdev = mlxsw_pci->pdev; struct mlxsw_pci_queue_elem_info *elem_info; @@ -554,8 +665,8 @@ static void mlxsw_pci_cqe_sdq_handle(struct mlxsw_pci *mlxsw_pci, spin_lock(&q->lock); elem_info = mlxsw_pci_queue_elem_info_consumer_get(q); - tx_info = mlxsw_skb_cb(elem_info->u.sdq.skb)->tx_info; - skb = elem_info->u.sdq.skb; + tx_info = mlxsw_skb_cb(elem_info->sdq.skb)->tx_info; + skb = elem_info->sdq.skb; wqe = elem_info->elem; for (i = 0; i < MLXSW_PCI_WQE_SG_ENTRIES; i++) mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, i, DMA_TO_DEVICE); @@ -569,8 +680,8 @@ static void mlxsw_pci_cqe_sdq_handle(struct mlxsw_pci *mlxsw_pci, } if (skb) - dev_kfree_skb_any(skb); - elem_info->u.sdq.skb = NULL; + napi_consume_skb(skb, budget); + elem_info->sdq.skb = NULL; if (q->consumer_counter++ != consumer_counter_limit) dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in SDQ\n"); @@ -627,32 +738,46 @@ static void mlxsw_pci_cqe_rdq_md_init(struct sk_buff *skb, const char *cqe) } static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, + struct napi_struct *napi, struct mlxsw_pci_queue *q, u16 consumer_counter_limit, enum mlxsw_pci_cqe_v cqe_v, char *cqe) { struct pci_dev *pdev = mlxsw_pci->pdev; + struct page *pages[MLXSW_PCI_WQE_SG_ENTRIES]; struct mlxsw_pci_queue_elem_info *elem_info; struct mlxsw_rx_info rx_info = {}; - char wqe[MLXSW_PCI_WQE_SIZE]; struct sk_buff *skb; + u8 num_sg_entries; u16 byte_count; int err; elem_info = mlxsw_pci_queue_elem_info_consumer_get(q); - skb = elem_info->u.rdq.skb; - memcpy(wqe, elem_info->elem, MLXSW_PCI_WQE_SIZE); if (q->consumer_counter++ != consumer_counter_limit) dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in RDQ\n"); - err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info, GFP_ATOMIC); - if (err) { - dev_err_ratelimited(&pdev->dev, "Failed to alloc skb for RDQ\n"); + byte_count = mlxsw_pci_cqe_byte_count_get(cqe); + if (mlxsw_pci_cqe_crc_get(cqe_v, cqe)) + byte_count -= ETH_FCS_LEN; + + err = mlxsw_pci_elem_info_pages_ref_store(q, elem_info, byte_count, + pages, &num_sg_entries); + if (err) + goto out; + + err = mlxsw_pci_rdq_pages_alloc(q, elem_info, num_sg_entries); + if (err) + goto out; + + skb = mlxsw_pci_rdq_build_skb(q, pages, byte_count); + if (IS_ERR(skb)) { + dev_err_ratelimited(&pdev->dev, "Failed to build skb for RDQ\n"); + mlxsw_pci_rdq_pages_recycle(q, pages, num_sg_entries); goto out; } - mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE); + skb_mark_for_recycle(skb); if (mlxsw_pci_cqe_lag_get(cqe_v, cqe)) { rx_info.is_lag = true; @@ -684,17 +809,12 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, } mlxsw_pci_skb_cb_ts_set(mlxsw_pci, skb, cqe_v, cqe); + mlxsw_skb_cb(skb)->rx_md_info.napi = napi; - byte_count = mlxsw_pci_cqe_byte_count_get(cqe); - if (mlxsw_pci_cqe_crc_get(cqe_v, cqe)) - byte_count -= ETH_FCS_LEN; - skb_put(skb, byte_count); mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info); out: - /* Everything is set up, ring doorbell to pass elem to HW */ q->producer_counter++; - mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); return; } @@ -714,13 +834,86 @@ static char *mlxsw_pci_cq_sw_cqe_get(struct mlxsw_pci_queue *q) return elem; } -static void mlxsw_pci_cq_tasklet(struct tasklet_struct *t) +static bool mlxsw_pci_cq_cqe_to_handle(struct mlxsw_pci_queue *q) +{ + struct mlxsw_pci_queue_elem_info *elem_info; + bool owner_bit; + + elem_info = mlxsw_pci_queue_elem_info_consumer_get(q); + owner_bit = mlxsw_pci_cqe_owner_get(q->u.cq.v, elem_info->elem); + return !mlxsw_pci_elem_hw_owned(q, owner_bit); +} + +static int mlxsw_pci_napi_poll_cq_rx(struct napi_struct *napi, int budget) { - struct mlxsw_pci_queue *q = from_tasklet(q, t, tasklet); + struct mlxsw_pci_queue *q = container_of(napi, struct mlxsw_pci_queue, + u.cq.napi); + struct mlxsw_pci_queue *rdq = q->u.cq.dq; struct mlxsw_pci *mlxsw_pci = q->pci; + int work_done = 0; + char *cqe; + + /* If the budget is 0, Rx processing should be skipped. */ + if (unlikely(!budget)) + return 0; + + while ((cqe = mlxsw_pci_cq_sw_cqe_get(q))) { + u16 wqe_counter = mlxsw_pci_cqe_wqe_counter_get(cqe); + u8 sendq = mlxsw_pci_cqe_sr_get(q->u.cq.v, cqe); + u8 dqn = mlxsw_pci_cqe_dqn_get(q->u.cq.v, cqe); + + if (unlikely(sendq)) { + WARN_ON_ONCE(1); + continue; + } + + if (unlikely(dqn != rdq->num)) { + WARN_ON_ONCE(1); + continue; + } + + mlxsw_pci_cqe_rdq_handle(mlxsw_pci, napi, rdq, + wqe_counter, q->u.cq.v, cqe); + + if (++work_done == budget) + break; + } + + mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); + mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, rdq); + + if (work_done < budget) + goto processing_completed; + + /* The driver still has outstanding work to do, budget was exhausted. + * Return exactly budget. In that case, the NAPI instance will be polled + * again. + */ + if (mlxsw_pci_cq_cqe_to_handle(q)) + goto out; + + /* The driver processed all the completions and handled exactly + * 'budget'. Return 'budget - 1' to distinguish from the case that + * driver still has completions to handle. + */ + if (work_done == budget) + work_done--; + +processing_completed: + if (napi_complete_done(napi, work_done)) + mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); +out: + return work_done; +} + +static int mlxsw_pci_napi_poll_cq_tx(struct napi_struct *napi, int budget) +{ + struct mlxsw_pci_queue *q = container_of(napi, struct mlxsw_pci_queue, + u.cq.napi); + struct mlxsw_pci_queue *sdq = q->u.cq.dq; + struct mlxsw_pci *mlxsw_pci = q->pci; + int work_done = 0; char *cqe; - int items = 0; - int credits = q->count >> 1; while ((cqe = mlxsw_pci_cq_sw_cqe_get(q))) { u16 wqe_counter = mlxsw_pci_cqe_wqe_counter_get(cqe); @@ -728,46 +921,113 @@ static void mlxsw_pci_cq_tasklet(struct tasklet_struct *t) u8 dqn = mlxsw_pci_cqe_dqn_get(q->u.cq.v, cqe); char ncqe[MLXSW_PCI_CQE_SIZE_MAX]; + if (unlikely(!sendq)) { + WARN_ON_ONCE(1); + continue; + } + + if (unlikely(dqn != sdq->num)) { + WARN_ON_ONCE(1); + continue; + } + memcpy(ncqe, cqe, q->elem_size); mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); - if (sendq) { - struct mlxsw_pci_queue *sdq; + mlxsw_pci_cqe_sdq_handle(mlxsw_pci, sdq, + wqe_counter, q->u.cq.v, ncqe, budget); - sdq = mlxsw_pci_sdq_get(mlxsw_pci, dqn); - mlxsw_pci_cqe_sdq_handle(mlxsw_pci, sdq, - wqe_counter, q->u.cq.v, ncqe); - q->u.cq.comp_sdq_count++; - } else { - struct mlxsw_pci_queue *rdq; + work_done++; + } - rdq = mlxsw_pci_rdq_get(mlxsw_pci, dqn); - mlxsw_pci_cqe_rdq_handle(mlxsw_pci, rdq, - wqe_counter, q->u.cq.v, ncqe); - q->u.cq.comp_rdq_count++; - } - if (++items == credits) - break; + /* If the budget is 0 napi_complete_done() should never be called. */ + if (unlikely(!budget)) + goto processing_completed; + + work_done = min(work_done, budget - 1); + if (unlikely(!napi_complete_done(napi, work_done))) + goto out; + +processing_completed: + mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); +out: + return work_done; +} + +static enum mlxsw_pci_cq_type +mlxsw_pci_cq_type(const struct mlxsw_pci *mlxsw_pci, + const struct mlxsw_pci_queue *q) +{ + /* Each CQ is mapped to one DQ. The first 'num_sdqs' queues are used + * for SDQs and the rest are used for RDQs. + */ + if (q->num < mlxsw_pci->num_sdqs) + return MLXSW_PCI_CQ_SDQ; + + return MLXSW_PCI_CQ_RDQ; +} + +static void mlxsw_pci_cq_napi_setup(struct mlxsw_pci_queue *q, + enum mlxsw_pci_cq_type cq_type) +{ + struct mlxsw_pci *mlxsw_pci = q->pci; + + switch (cq_type) { + case MLXSW_PCI_CQ_SDQ: + netif_napi_add(mlxsw_pci->napi_dev_tx, &q->u.cq.napi, + mlxsw_pci_napi_poll_cq_tx); + break; + case MLXSW_PCI_CQ_RDQ: + netif_napi_add(mlxsw_pci->napi_dev_rx, &q->u.cq.napi, + mlxsw_pci_napi_poll_cq_rx); + break; } - if (items) - mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); } -static u16 mlxsw_pci_cq_elem_count(const struct mlxsw_pci_queue *q) +static void mlxsw_pci_cq_napi_teardown(struct mlxsw_pci_queue *q) { - return q->u.cq.v == MLXSW_PCI_CQE_V2 ? MLXSW_PCI_CQE2_COUNT : - MLXSW_PCI_CQE01_COUNT; + netif_napi_del(&q->u.cq.napi); } -static u8 mlxsw_pci_cq_elem_size(const struct mlxsw_pci_queue *q) +static int mlxsw_pci_cq_page_pool_init(struct mlxsw_pci_queue *q, + enum mlxsw_pci_cq_type cq_type) { - return q->u.cq.v == MLXSW_PCI_CQE_V2 ? MLXSW_PCI_CQE2_SIZE : - MLXSW_PCI_CQE01_SIZE; + struct page_pool_params pp_params = {}; + struct mlxsw_pci *mlxsw_pci = q->pci; + struct page_pool *page_pool; + + if (cq_type != MLXSW_PCI_CQ_RDQ) + return 0; + + pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; + pp_params.pool_size = MLXSW_PCI_WQE_COUNT * mlxsw_pci->num_sg_entries; + pp_params.nid = dev_to_node(&mlxsw_pci->pdev->dev); + pp_params.dev = &mlxsw_pci->pdev->dev; + pp_params.napi = &q->u.cq.napi; + pp_params.dma_dir = DMA_FROM_DEVICE; + pp_params.max_len = PAGE_SIZE; + + page_pool = page_pool_create(&pp_params); + if (IS_ERR(page_pool)) + return PTR_ERR(page_pool); + + q->u.cq.page_pool = page_pool; + return 0; } -static int mlxsw_pci_eq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, +static void mlxsw_pci_cq_page_pool_fini(struct mlxsw_pci_queue *q, + enum mlxsw_pci_cq_type cq_type) +{ + if (cq_type != MLXSW_PCI_CQ_RDQ) + return; + + page_pool_destroy(q->u.cq.page_pool); +} + +static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, struct mlxsw_pci_queue *q) { + enum mlxsw_pci_cq_type cq_type = mlxsw_pci_cq_type(mlxsw_pci, q); int i; int err; @@ -776,39 +1036,64 @@ static int mlxsw_pci_eq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, for (i = 0; i < q->count; i++) { char *elem = mlxsw_pci_queue_elem_get(q, i); - mlxsw_pci_eqe_owner_set(elem, 1); + mlxsw_pci_cqe_owner_set(q->u.cq.v, elem, 1); } - mlxsw_cmd_mbox_sw2hw_eq_int_msix_set(mbox, 1); /* MSI-X used */ - mlxsw_cmd_mbox_sw2hw_eq_st_set(mbox, 1); /* armed */ - mlxsw_cmd_mbox_sw2hw_eq_log_eq_size_set(mbox, ilog2(q->count)); + if (q->u.cq.v == MLXSW_PCI_CQE_V1) + mlxsw_cmd_mbox_sw2hw_cq_cqe_ver_set(mbox, + MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_1); + else if (q->u.cq.v == MLXSW_PCI_CQE_V2) + mlxsw_cmd_mbox_sw2hw_cq_cqe_ver_set(mbox, + MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_2); + + mlxsw_cmd_mbox_sw2hw_cq_c_eqn_set(mbox, MLXSW_PCI_EQ_COMP_NUM); + mlxsw_cmd_mbox_sw2hw_cq_st_set(mbox, 0); + mlxsw_cmd_mbox_sw2hw_cq_log_cq_size_set(mbox, ilog2(q->count)); for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) { dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i); - mlxsw_cmd_mbox_sw2hw_eq_pa_set(mbox, i, mapaddr); + mlxsw_cmd_mbox_sw2hw_cq_pa_set(mbox, i, mapaddr); } - err = mlxsw_cmd_sw2hw_eq(mlxsw_pci->core, mbox, q->num); + err = mlxsw_cmd_sw2hw_cq(mlxsw_pci->core, mbox, q->num); if (err) return err; + mlxsw_pci_cq_napi_setup(q, cq_type); + + err = mlxsw_pci_cq_page_pool_init(q, cq_type); + if (err) + goto err_page_pool_init; + + napi_enable(&q->u.cq.napi); mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); return 0; + +err_page_pool_init: + mlxsw_pci_cq_napi_teardown(q); + return err; } -static void mlxsw_pci_eq_fini(struct mlxsw_pci *mlxsw_pci, +static void mlxsw_pci_cq_fini(struct mlxsw_pci *mlxsw_pci, struct mlxsw_pci_queue *q) { - mlxsw_cmd_hw2sw_eq(mlxsw_pci->core, q->num); + enum mlxsw_pci_cq_type cq_type = mlxsw_pci_cq_type(mlxsw_pci, q); + + napi_disable(&q->u.cq.napi); + mlxsw_pci_cq_page_pool_fini(q, cq_type); + mlxsw_pci_cq_napi_teardown(q); + mlxsw_cmd_hw2sw_cq(mlxsw_pci->core, q->num); +} + +static u16 mlxsw_pci_cq_elem_count(const struct mlxsw_pci_queue *q) +{ + return q->u.cq.v == MLXSW_PCI_CQE_V2 ? MLXSW_PCI_CQE2_COUNT : + MLXSW_PCI_CQE01_COUNT; } -static void mlxsw_pci_eq_cmd_event(struct mlxsw_pci *mlxsw_pci, char *eqe) +static u8 mlxsw_pci_cq_elem_size(const struct mlxsw_pci_queue *q) { - mlxsw_pci->cmd.comp.status = mlxsw_pci_eqe_cmd_status_get(eqe); - mlxsw_pci->cmd.comp.out_param = - ((u64) mlxsw_pci_eqe_cmd_out_param_h_get(eqe)) << 32 | - mlxsw_pci_eqe_cmd_out_param_l_get(eqe); - mlxsw_pci->cmd.wait_done = true; - wake_up(&mlxsw_pci->cmd.wait); + return q->u.cq.v == MLXSW_PCI_CQE_V2 ? MLXSW_PCI_CQE2_SIZE : + MLXSW_PCI_CQE01_SIZE; } static char *mlxsw_pci_eq_sw_eqe_get(struct mlxsw_pci_queue *q) @@ -829,54 +1114,81 @@ static char *mlxsw_pci_eq_sw_eqe_get(struct mlxsw_pci_queue *q) static void mlxsw_pci_eq_tasklet(struct tasklet_struct *t) { - struct mlxsw_pci_queue *q = from_tasklet(q, t, tasklet); - struct mlxsw_pci *mlxsw_pci = q->pci; - u8 cq_count = mlxsw_pci_cq_count(mlxsw_pci); unsigned long active_cqns[BITS_TO_LONGS(MLXSW_PCI_CQS_MAX)]; - char *eqe; - u8 cqn; - bool cq_handle = false; - int items = 0; + struct mlxsw_pci_queue *q = from_tasklet(q, t, u.eq.tasklet); + struct mlxsw_pci *mlxsw_pci = q->pci; int credits = q->count >> 1; + u8 cqn, cq_count; + int items = 0; + char *eqe; memset(&active_cqns, 0, sizeof(active_cqns)); while ((eqe = mlxsw_pci_eq_sw_eqe_get(q))) { + cqn = mlxsw_pci_eqe_cqn_get(eqe); + set_bit(cqn, active_cqns); - /* Command interface completion events are always received on - * queue MLXSW_PCI_EQ_ASYNC_NUM (EQ0) and completion events - * are mapped to queue MLXSW_PCI_EQ_COMP_NUM (EQ1). - */ - switch (q->num) { - case MLXSW_PCI_EQ_ASYNC_NUM: - mlxsw_pci_eq_cmd_event(mlxsw_pci, eqe); - q->u.eq.ev_cmd_count++; - break; - case MLXSW_PCI_EQ_COMP_NUM: - cqn = mlxsw_pci_eqe_cqn_get(eqe); - set_bit(cqn, active_cqns); - cq_handle = true; - q->u.eq.ev_comp_count++; - break; - default: - q->u.eq.ev_other_count++; - } if (++items == credits) break; } - if (items) { - mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); - mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); - } - if (!cq_handle) + if (!items) return; + + mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); + mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); + + cq_count = mlxsw_pci->num_cqs; for_each_set_bit(cqn, active_cqns, cq_count) { q = mlxsw_pci_cq_get(mlxsw_pci, cqn); - mlxsw_pci_queue_tasklet_schedule(q); + napi_schedule(&q->u.cq.napi); } } +static int mlxsw_pci_eq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, + struct mlxsw_pci_queue *q) +{ + int i; + int err; + + /* We expect to initialize only one EQ, which gets num=0 as it is + * located at index zero. We use the EQ as EQ1, so set the number for + * future use. + */ + WARN_ON_ONCE(q->num); + q->num = MLXSW_PCI_EQ_COMP_NUM; + + q->consumer_counter = 0; + + for (i = 0; i < q->count; i++) { + char *elem = mlxsw_pci_queue_elem_get(q, i); + + mlxsw_pci_eqe_owner_set(elem, 1); + } + + mlxsw_cmd_mbox_sw2hw_eq_int_msix_set(mbox, 1); /* MSI-X used */ + mlxsw_cmd_mbox_sw2hw_eq_st_set(mbox, 1); /* armed */ + mlxsw_cmd_mbox_sw2hw_eq_log_eq_size_set(mbox, ilog2(q->count)); + for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) { + dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i); + + mlxsw_cmd_mbox_sw2hw_eq_pa_set(mbox, i, mapaddr); + } + err = mlxsw_cmd_sw2hw_eq(mlxsw_pci->core, mbox, q->num); + if (err) + return err; + tasklet_setup(&q->u.eq.tasklet, mlxsw_pci_eq_tasklet); + mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); + mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); + return 0; +} + +static void mlxsw_pci_eq_fini(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q) +{ + mlxsw_cmd_hw2sw_eq(mlxsw_pci->core, q->num); +} + struct mlxsw_pci_queue_ops { const char *name; enum mlxsw_pci_queue_type type; @@ -886,7 +1198,6 @@ struct mlxsw_pci_queue_ops { struct mlxsw_pci_queue *q); void (*fini)(struct mlxsw_pci *mlxsw_pci, struct mlxsw_pci_queue *q); - void (*tasklet)(struct tasklet_struct *t); u16 (*elem_count_f)(const struct mlxsw_pci_queue *q); u8 (*elem_size_f)(const struct mlxsw_pci_queue *q); u16 elem_count; @@ -914,7 +1225,6 @@ static const struct mlxsw_pci_queue_ops mlxsw_pci_cq_ops = { .pre_init = mlxsw_pci_cq_pre_init, .init = mlxsw_pci_cq_init, .fini = mlxsw_pci_cq_fini, - .tasklet = mlxsw_pci_cq_tasklet, .elem_count_f = mlxsw_pci_cq_elem_count, .elem_size_f = mlxsw_pci_cq_elem_size }; @@ -923,7 +1233,6 @@ static const struct mlxsw_pci_queue_ops mlxsw_pci_eq_ops = { .type = MLXSW_PCI_QUEUE_TYPE_EQ, .init = mlxsw_pci_eq_init, .fini = mlxsw_pci_eq_fini, - .tasklet = mlxsw_pci_eq_tasklet, .elem_count = MLXSW_PCI_EQE_COUNT, .elem_size = MLXSW_PCI_EQE_SIZE }; @@ -948,9 +1257,6 @@ static int mlxsw_pci_queue_init(struct mlxsw_pci *mlxsw_pci, char *mbox, q->type = q_ops->type; q->pci = mlxsw_pci; - if (q_ops->tasklet) - tasklet_setup(&q->tasklet, q_ops->tasklet); - mem_item->size = MLXSW_PCI_AQ_SIZE; mem_item->buf = dma_alloc_coherent(&mlxsw_pci->pdev->dev, mem_item->size, &mem_item->mapaddr, @@ -1074,7 +1380,7 @@ static int mlxsw_pci_aqs_init(struct mlxsw_pci *mlxsw_pci, char *mbox) if (num_sdqs + num_rdqs > num_cqs || num_sdqs < MLXSW_PCI_SDQS_MIN || - num_cqs > MLXSW_PCI_CQS_MAX || num_eqs != MLXSW_PCI_EQS_COUNT) { + num_cqs > MLXSW_PCI_CQS_MAX || num_eqs != MLXSW_PCI_EQS_MAX) { dev_err(&pdev->dev, "Unsupported number of queues\n"); return -EINVAL; } @@ -1089,10 +1395,11 @@ static int mlxsw_pci_aqs_init(struct mlxsw_pci *mlxsw_pci, char *mbox) return -EINVAL; } - mlxsw_pci->num_sdq_cqs = num_sdqs; + mlxsw_pci->num_cqs = num_cqs; + mlxsw_pci->num_sdqs = num_sdqs; err = mlxsw_pci_queue_group_init(mlxsw_pci, mbox, &mlxsw_pci_eq_ops, - num_eqs); + MLXSW_PCI_EQS_COUNT); if (err) { dev_err(&pdev->dev, "Failed to initialize event queues\n"); return err; @@ -1119,8 +1426,6 @@ static int mlxsw_pci_aqs_init(struct mlxsw_pci *mlxsw_pci, char *mbox) goto err_rdqs_init; } - /* We have to poll in command interface until queues are initialized */ - mlxsw_pci->cmd.nopoll = true; return 0; err_rdqs_init: @@ -1134,7 +1439,6 @@ err_cqs_init: static void mlxsw_pci_aqs_fini(struct mlxsw_pci *mlxsw_pci) { - mlxsw_pci->cmd.nopoll = false; mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_rdq_ops); mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_sdq_ops); mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_cq_ops); @@ -1432,12 +1736,9 @@ static irqreturn_t mlxsw_pci_eq_irq_handler(int irq, void *dev_id) { struct mlxsw_pci *mlxsw_pci = dev_id; struct mlxsw_pci_queue *q; - int i; - for (i = 0; i < MLXSW_PCI_EQS_COUNT; i++) { - q = mlxsw_pci_eq_get(mlxsw_pci, i); - mlxsw_pci_queue_tasklet_schedule(q); - } + q = mlxsw_pci_eq_get(mlxsw_pci); + tasklet_schedule(&q->u.eq.tasklet); return IRQ_HANDLED; } @@ -1490,20 +1791,31 @@ static int mlxsw_pci_sys_ready_wait(struct mlxsw_pci *mlxsw_pci, return -EBUSY; } -static int mlxsw_pci_reset_at_pci_disable(struct mlxsw_pci *mlxsw_pci) +static int mlxsw_pci_reset_at_pci_disable(struct mlxsw_pci *mlxsw_pci, + bool pci_reset_sbr_supported) { struct pci_dev *pdev = mlxsw_pci->pdev; char mrsr_pl[MLXSW_REG_MRSR_LEN]; + struct pci_dev *bridge; int err; + if (!pci_reset_sbr_supported) { + pci_dbg(pdev, "Performing PCI hot reset instead of \"all reset\"\n"); + goto sbr; + } + mlxsw_reg_mrsr_pack(mrsr_pl, MLXSW_REG_MRSR_COMMAND_RESET_AT_PCI_DISABLE); err = mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl); if (err) return err; +sbr: device_lock_assert(&pdev->dev); + bridge = pci_upstream_bridge(pdev); + if (bridge) + pci_cfg_access_lock(bridge); pci_cfg_access_lock(pdev); pci_save_state(pdev); @@ -1513,6 +1825,8 @@ static int mlxsw_pci_reset_at_pci_disable(struct mlxsw_pci *mlxsw_pci) pci_restore_state(pdev); pci_cfg_access_unlock(pdev); + if (bridge) + pci_cfg_access_unlock(bridge); return err; } @@ -1529,8 +1843,9 @@ static int mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci, const struct pci_device_id *id) { struct pci_dev *pdev = mlxsw_pci->pdev; + bool pci_reset_sbr_supported = false; char mcam_pl[MLXSW_REG_MCAM_LEN]; - bool pci_reset_supported; + bool pci_reset_supported = false; u32 sys_status; int err; @@ -1548,15 +1863,17 @@ mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci, const struct pci_device_id *id) mlxsw_reg_mcam_pack(mcam_pl, MLXSW_REG_MCAM_FEATURE_GROUP_ENHANCED_FEATURES); err = mlxsw_reg_query(mlxsw_pci->core, MLXSW_REG(mcam), mcam_pl); - if (err) - return err; - - mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_PCI_RESET, - &pci_reset_supported); + if (!err) { + mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_PCI_RESET, + &pci_reset_supported); + mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_PCI_RESET_SBR, + &pci_reset_sbr_supported); + } if (pci_reset_supported) { pci_dbg(pdev, "Starting PCI reset flow\n"); - err = mlxsw_pci_reset_at_pci_disable(mlxsw_pci); + err = mlxsw_pci_reset_at_pci_disable(mlxsw_pci, + pci_reset_sbr_supported); } else { pci_dbg(pdev, "Starting software reset flow\n"); err = mlxsw_pci_reset_sw(mlxsw_pci); @@ -1589,6 +1906,17 @@ static void mlxsw_pci_free_irq_vectors(struct mlxsw_pci *mlxsw_pci) pci_free_irq_vectors(mlxsw_pci->pdev); } +static void mlxsw_pci_num_sg_entries_set(struct mlxsw_pci *mlxsw_pci) +{ + u8 num_sg_entries; + + num_sg_entries = mlxsw_pci_num_sg_entries_get(MLXSW_PORT_MAX_MTU); + mlxsw_pci->num_sg_entries = min(num_sg_entries, + MLXSW_PCI_WQE_SG_ENTRIES); + + WARN_ON(num_sg_entries > MLXSW_PCI_WQE_SG_ENTRIES); +} + static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, const struct mlxsw_config_profile *profile, struct mlxsw_res *res) @@ -1711,6 +2039,12 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, if (err) goto err_requery_resources; + mlxsw_pci_num_sg_entries_set(mlxsw_pci); + + err = mlxsw_pci_napi_devs_init(mlxsw_pci); + if (err) + goto err_napi_devs_init; + err = mlxsw_pci_aqs_init(mlxsw_pci, mbox); if (err) goto err_aqs_init; @@ -1728,6 +2062,8 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, err_request_eq_irq: mlxsw_pci_aqs_fini(mlxsw_pci); err_aqs_init: + mlxsw_pci_napi_devs_fini(mlxsw_pci); +err_napi_devs_init: err_requery_resources: err_config_profile: err_cqe_v_check: @@ -1755,15 +2091,49 @@ static void mlxsw_pci_fini(void *bus_priv) free_irq(pci_irq_vector(mlxsw_pci->pdev, 0), mlxsw_pci); mlxsw_pci_aqs_fini(mlxsw_pci); + mlxsw_pci_napi_devs_fini(mlxsw_pci); mlxsw_pci_fw_area_fini(mlxsw_pci); mlxsw_pci_free_irq_vectors(mlxsw_pci); } +static int mlxsw_pci_txhdr_construct(struct sk_buff *skb, + const struct mlxsw_txhdr_info *txhdr_info) +{ + const struct mlxsw_tx_info tx_info = txhdr_info->tx_info; + char *txhdr; + + if (skb_cow_head(skb, MLXSW_TXHDR_LEN)) + return -ENOMEM; + + txhdr = skb_push(skb, MLXSW_TXHDR_LEN); + memset(txhdr, 0, MLXSW_TXHDR_LEN); + + mlxsw_tx_hdr_version_set(txhdr, MLXSW_TXHDR_VERSION_1); + mlxsw_tx_hdr_proto_set(txhdr, MLXSW_TXHDR_PROTO_ETH); + mlxsw_tx_hdr_swid_set(txhdr, 0); + + if (unlikely(txhdr_info->data)) { + u16 fid = txhdr_info->max_fid + tx_info.local_port - 1; + + mlxsw_tx_hdr_rx_is_router_set(txhdr, true); + mlxsw_tx_hdr_fid_valid_set(txhdr, true); + mlxsw_tx_hdr_fid_set(txhdr, fid); + mlxsw_tx_hdr_type_set(txhdr, MLXSW_TXHDR_TYPE_DATA); + } else { + mlxsw_tx_hdr_ctl_set(txhdr, MLXSW_TXHDR_ETH_CTL); + mlxsw_tx_hdr_control_tclass_set(txhdr, 1); + mlxsw_tx_hdr_port_mid_set(txhdr, tx_info.local_port); + mlxsw_tx_hdr_type_set(txhdr, MLXSW_TXHDR_TYPE_CONTROL); + } + + return 0; +} + static struct mlxsw_pci_queue * mlxsw_pci_sdq_pick(struct mlxsw_pci *mlxsw_pci, const struct mlxsw_tx_info *tx_info) { - u8 ctl_sdq_count = mlxsw_pci_sdq_count(mlxsw_pci) - 1; + u8 ctl_sdq_count = mlxsw_pci->num_sdqs - 1; u8 sdqn; if (tx_info->is_emad) { @@ -1786,7 +2156,7 @@ static bool mlxsw_pci_skb_transmit_busy(void *bus_priv, } static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info) + const struct mlxsw_txhdr_info *txhdr_info) { struct mlxsw_pci *mlxsw_pci = bus_priv; struct mlxsw_pci_queue *q; @@ -1795,13 +2165,17 @@ static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb, int i; int err; + err = mlxsw_pci_txhdr_construct(skb, txhdr_info); + if (err) + return err; + if (skb_shinfo(skb)->nr_frags > MLXSW_PCI_WQE_SG_ENTRIES - 1) { err = skb_linearize(skb); if (err) return err; } - q = mlxsw_pci_sdq_pick(mlxsw_pci, tx_info); + q = mlxsw_pci_sdq_pick(mlxsw_pci, &txhdr_info->tx_info); spin_lock_bh(&q->lock); elem_info = mlxsw_pci_queue_elem_info_producer_get(q); if (!elem_info) { @@ -1809,8 +2183,8 @@ static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb, err = -EAGAIN; goto unlock; } - mlxsw_skb_cb(skb)->tx_info = *tx_info; - elem_info->u.sdq.skb = skb; + mlxsw_skb_cb(skb)->tx_info = txhdr_info->tx_info; + elem_info->sdq.skb = skb; wqe = elem_info->elem; mlxsw_pci_wqe_c_set(wqe, 1); /* always report completion */ @@ -1862,9 +2236,9 @@ static int mlxsw_pci_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod, { struct mlxsw_pci *mlxsw_pci = bus_priv; dma_addr_t in_mapaddr = 0, out_mapaddr = 0; - bool evreq = mlxsw_pci->cmd.nopoll; unsigned long timeout = msecs_to_jiffies(MLXSW_PCI_CIR_TIMEOUT_MSECS); - bool *p_wait_done = &mlxsw_pci->cmd.wait_done; + unsigned long end; + bool wait_done; int err; *p_status = MLXSW_CMD_STATUS_OK; @@ -1888,36 +2262,28 @@ static int mlxsw_pci_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod, mlxsw_pci_write32(mlxsw_pci, CIR_IN_MODIFIER, in_mod); mlxsw_pci_write32(mlxsw_pci, CIR_TOKEN, 0); - *p_wait_done = false; + wait_done = false; wmb(); /* all needs to be written before we write control register */ mlxsw_pci_write32(mlxsw_pci, CIR_CTRL, MLXSW_PCI_CIR_CTRL_GO_BIT | - (evreq ? MLXSW_PCI_CIR_CTRL_EVREQ_BIT : 0) | (opcode_mod << MLXSW_PCI_CIR_CTRL_OPCODE_MOD_SHIFT) | opcode); - if (!evreq) { - unsigned long end; - - end = jiffies + timeout; - do { - u32 ctrl = mlxsw_pci_read32(mlxsw_pci, CIR_CTRL); + end = jiffies + timeout; + do { + u32 ctrl = mlxsw_pci_read32(mlxsw_pci, CIR_CTRL); - if (!(ctrl & MLXSW_PCI_CIR_CTRL_GO_BIT)) { - *p_wait_done = true; - *p_status = ctrl >> MLXSW_PCI_CIR_CTRL_STATUS_SHIFT; - break; - } - cond_resched(); - } while (time_before(jiffies, end)); - } else { - wait_event_timeout(mlxsw_pci->cmd.wait, *p_wait_done, timeout); - *p_status = mlxsw_pci->cmd.comp.status; - } + if (!(ctrl & MLXSW_PCI_CIR_CTRL_GO_BIT)) { + wait_done = true; + *p_status = ctrl >> MLXSW_PCI_CIR_CTRL_STATUS_SHIFT; + break; + } + cond_resched(); + } while (time_before(jiffies, end)); err = 0; - if (*p_wait_done) { + if (wait_done) { if (*p_status) err = -EIO; } else { @@ -1931,14 +2297,12 @@ static int mlxsw_pci_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod, */ __be32 tmp; - if (!evreq) { - tmp = cpu_to_be32(mlxsw_pci_read32(mlxsw_pci, - CIR_OUT_PARAM_HI)); - memcpy(out_mbox, &tmp, sizeof(tmp)); - tmp = cpu_to_be32(mlxsw_pci_read32(mlxsw_pci, - CIR_OUT_PARAM_LO)); - memcpy(out_mbox + sizeof(tmp), &tmp, sizeof(tmp)); - } + tmp = cpu_to_be32(mlxsw_pci_read32(mlxsw_pci, + CIR_OUT_PARAM_HI)); + memcpy(out_mbox, &tmp, sizeof(tmp)); + tmp = cpu_to_be32(mlxsw_pci_read32(mlxsw_pci, + CIR_OUT_PARAM_LO)); + memcpy(out_mbox + sizeof(tmp), &tmp, sizeof(tmp)); } else if (!err && out_mbox) { memcpy(out_mbox, mlxsw_pci->cmd.out_mbox.buf, out_mbox_size); } @@ -2017,7 +2381,6 @@ static int mlxsw_pci_cmd_init(struct mlxsw_pci *mlxsw_pci) int err; mutex_init(&mlxsw_pci->cmd.lock); - init_waitqueue_head(&mlxsw_pci->cmd.wait); err = mlxsw_pci_mbox_alloc(mlxsw_pci, &mlxsw_pci->cmd.in_mbox); if (err) |