From 599d49de7f69cb5a23e913db24e168ba2f09bd05 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 11 Aug 2015 08:48:49 -0700 Subject: dmaengine: ioatdma: move dma prep functions to single location Move all DMA descriptor prepping functions to prep.c file. Fixup all broken bits caused by the move. Signed-off-by: Dave Jiang Acked-by: Dan Williams Signed-off-by: Vinod Koul --- drivers/dma/ioat/Makefile | 2 +- drivers/dma/ioat/dma.c | 47 --- drivers/dma/ioat/dma.h | 81 +++--- drivers/dma/ioat/dma_v3.c | 659 +----------------------------------------- drivers/dma/ioat/init.c | 22 +- drivers/dma/ioat/prep.c | 707 ++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 769 insertions(+), 749 deletions(-) create mode 100644 drivers/dma/ioat/prep.c diff --git a/drivers/dma/ioat/Makefile b/drivers/dma/ioat/Makefile index f785f8f42f7d..3a7e66464d0c 100644 --- a/drivers/dma/ioat/Makefile +++ b/drivers/dma/ioat/Makefile @@ -1,2 +1,2 @@ obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o -ioatdma-y := init.o dma.o dma_v3.o dca.o sysfs.o +ioatdma-y := init.o dma.o dma_v3.o prep.o dca.o sysfs.o diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 5d78cafdd3f2..e67eda055ea5 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -578,50 +578,3 @@ int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs) return -ENOMEM; } - -struct dma_async_tx_descriptor * -ioat_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, - dma_addr_t dma_src, size_t len, unsigned long flags) -{ - struct ioatdma_chan *ioat_chan = to_ioat_chan(c); - struct ioat_dma_descriptor *hw; - struct ioat_ring_ent *desc; - dma_addr_t dst = dma_dest; - dma_addr_t src = dma_src; - size_t total_len = len; - int num_descs, idx, i; - - num_descs = ioat_xferlen_to_descs(ioat_chan, len); - if (likely(num_descs) && - ioat_check_space_lock(ioat_chan, num_descs) == 0) - idx = ioat_chan->head; - else - return NULL; - i = 0; - do { - size_t copy = min_t(size_t, len, 1 << ioat_chan->xfercap_log); - - desc = ioat_get_ring_ent(ioat_chan, idx + i); - hw = desc->hw; - - hw->size = copy; - hw->ctl = 0; - hw->src_addr = src; - hw->dst_addr = dst; - - len -= copy; - dst += copy; - src += copy; - dump_desc_dbg(ioat_chan, desc); - } while (++i < num_descs); - - desc->txd.flags = flags; - desc->len = total_len; - hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); - hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); - hw->ctl_f.compl_write = 1; - dump_desc_dbg(ioat_chan, desc); - /* we leave the channel locked to ensure in order submission */ - - return &desc->txd; -} diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index d2ffa5775d53..a319befad1a3 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -37,6 +37,14 @@ #define chan_num(ch) ((int)((ch)->reg_base - (ch)->ioat_dma->reg_base) / 0x80) +/* ioat hardware assumes at least two sources for raid operations */ +#define src_cnt_to_sw(x) ((x) + 2) +#define src_cnt_to_hw(x) ((x) - 2) +#define ndest_to_sw(x) ((x) + 1) +#define ndest_to_hw(x) ((x) - 1) +#define src16_cnt_to_sw(x) ((x) + 9) +#define src16_cnt_to_hw(x) ((x) - 9) + /* * workaround for IOAT ver.3.0 null descriptor issue * (channel returns error when size is 0) @@ -190,15 +198,22 @@ struct ioat_ring_ent { struct ioat_sed_ent *sed; }; +extern const struct sysfs_ops ioat_sysfs_ops; +extern struct ioat_sysfs_entry ioat_version_attr; +extern struct ioat_sysfs_entry ioat_cap_attr; +extern int ioat_pending_level; +extern int ioat_ring_alloc_order; +extern struct kobj_type ioat_ktype; +extern struct kmem_cache *ioat_cache; +extern int ioat_ring_max_alloc_order; +extern struct kmem_cache *ioat_sed_cache; + static inline struct ioatdma_chan *to_ioat_chan(struct dma_chan *c) { return container_of(c, struct ioatdma_chan, dma_chan); } - - /* wrapper around hardware descriptor format + additional software fields */ - #ifdef DEBUG #define set_desc_id(desc, i) ((desc)->id = (i)) #define desc_id(desc) ((desc)->id) @@ -381,13 +396,10 @@ ioat_set_chainaddr(struct ioatdma_chan *ioat_chan, u64 addr) ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); } -irqreturn_t ioat_dma_do_interrupt(int irq, void *data); -irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data); -struct ioat_ring_ent ** -ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags); -void ioat_start_null_desc(struct ioatdma_chan *ioat_chan); -void ioat_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan); -int ioat_reset_hw(struct ioatdma_chan *ioat_chan); +/* IOAT Prep functions */ +struct dma_async_tx_descriptor * +ioat_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags); struct dma_async_tx_descriptor * ioat_prep_interrupt_lock(struct dma_chan *c, unsigned long flags); struct dma_async_tx_descriptor * @@ -412,53 +424,38 @@ struct dma_async_tx_descriptor * ioat_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, size_t len, enum sum_check_flags *result, unsigned long flags); + +/* IOAT Operation functions */ +irqreturn_t ioat_dma_do_interrupt(int irq, void *data); +irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data); +struct ioat_ring_ent ** +ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags); +void ioat_start_null_desc(struct ioatdma_chan *ioat_chan); +void ioat_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan); +int ioat_reset_hw(struct ioatdma_chan *ioat_chan); enum dma_status ioat_tx_status(struct dma_chan *c, dma_cookie_t cookie, struct dma_tx_state *txstate); void ioat_cleanup_event(unsigned long data); void ioat_timer_event(unsigned long data); -bool is_bwd_ioat(struct pci_dev *pdev); -int ioat_probe(struct ioatdma_device *ioat_dma); -int ioat_register(struct ioatdma_device *ioat_dma); -int ioat_dma_self_test(struct ioatdma_device *ioat_dma); -void ioat_dma_remove(struct ioatdma_device *ioat_dma); -struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase); -void ioat_init_channel(struct ioatdma_device *ioat_dma, - struct ioatdma_chan *ioat_chan, int idx); enum dma_status ioat_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie, struct dma_tx_state *txstate); bool ioat_cleanup_preamble(struct ioatdma_chan *ioat_chan, dma_addr_t *phys_complete); -void ioat_kobject_add(struct ioatdma_device *ioat_dma, struct kobj_type *type); -void ioat_kobject_del(struct ioatdma_device *ioat_dma); -int ioat_dma_setup_interrupts(struct ioatdma_device *ioat_dma); -void ioat_stop(struct ioatdma_chan *ioat_chan); -int ioat_dma_probe(struct ioatdma_device *ioat_dma, int dca); -int ioat3_dma_probe(struct ioatdma_device *ioat_dma, int dca); -struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs); -int ioat_enumerate_channels(struct ioatdma_device *ioat_dma); -struct dma_async_tx_descriptor * -ioat_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, - dma_addr_t dma_src, size_t len, unsigned long flags); void ioat_issue_pending(struct dma_chan *chan); -int ioat_alloc_chan_resources(struct dma_chan *c); -void ioat_free_chan_resources(struct dma_chan *c); -void __ioat_restart_chan(struct ioatdma_chan *ioat_chan); bool reshape_ring(struct ioatdma_chan *ioat, int order); void __ioat_issue_pending(struct ioatdma_chan *ioat_chan); void ioat_timer_event(unsigned long data); int ioat_quiesce(struct ioatdma_chan *ioat_chan, unsigned long tmo); int ioat_reset_sync(struct ioatdma_chan *ioat_chan, unsigned long tmo); +void __ioat_restart_chan(struct ioatdma_chan *ioat_chan); -extern const struct sysfs_ops ioat_sysfs_ops; -extern struct ioat_sysfs_entry ioat_version_attr; -extern struct ioat_sysfs_entry ioat_cap_attr; -extern int ioat_pending_level; -extern int ioat_ring_alloc_order; -extern struct kobj_type ioat_ktype; -extern struct kmem_cache *ioat_cache; -extern int ioat_ring_max_alloc_order; -extern struct kmem_cache *ioat_sed_cache; - +/* IOAT Init functions */ +bool is_bwd_ioat(struct pci_dev *pdev); +void ioat_kobject_add(struct ioatdma_device *ioat_dma, struct kobj_type *type); +void ioat_kobject_del(struct ioatdma_device *ioat_dma); +int ioat_dma_setup_interrupts(struct ioatdma_device *ioat_dma); +void ioat_stop(struct ioatdma_chan *ioat_chan); +struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); #endif /* IOATDMA_H */ diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index f6a194a3a463..d0ae8f7c97a6 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -62,107 +62,8 @@ #include "hw.h" #include "dma.h" -/* ioat hardware assumes at least two sources for raid operations */ -#define src_cnt_to_sw(x) ((x) + 2) -#define src_cnt_to_hw(x) ((x) - 2) -#define ndest_to_sw(x) ((x) + 1) -#define ndest_to_hw(x) ((x) - 1) -#define src16_cnt_to_sw(x) ((x) + 9) -#define src16_cnt_to_hw(x) ((x) - 9) - -/* provide a lookup table for setting the source address in the base or - * extended descriptor of an xor or pq descriptor - */ -static const u8 xor_idx_to_desc = 0xe0; -static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 }; -static const u8 pq_idx_to_desc = 0xf8; -static const u8 pq16_idx_to_desc[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2 }; -static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 }; -static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7, - 0, 1, 2, 3, 4, 5, 6 }; - static void ioat3_eh(struct ioatdma_chan *ioat_chan); -static void xor_set_src(struct ioat_raw_descriptor *descs[2], - dma_addr_t addr, u32 offset, int idx) -{ - struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; - - raw->field[xor_idx_to_field[idx]] = addr + offset; -} - -static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx) -{ - struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; - - return raw->field[pq_idx_to_field[idx]]; -} - -static dma_addr_t pq16_get_src(struct ioat_raw_descriptor *desc[3], int idx) -{ - struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; - - return raw->field[pq16_idx_to_field[idx]]; -} - -static void pq_set_src(struct ioat_raw_descriptor *descs[2], - dma_addr_t addr, u32 offset, u8 coef, int idx) -{ - struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0]; - struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; - - raw->field[pq_idx_to_field[idx]] = addr + offset; - pq->coef[idx] = coef; -} - -static void pq16_set_src(struct ioat_raw_descriptor *desc[3], - dma_addr_t addr, u32 offset, u8 coef, unsigned idx) -{ - struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *)desc[0]; - struct ioat_pq16a_descriptor *pq16 = - (struct ioat_pq16a_descriptor *)desc[1]; - struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; - - raw->field[pq16_idx_to_field[idx]] = addr + offset; - - if (idx < 8) - pq->coef[idx] = coef; - else - pq16->coef[idx - 8] = coef; -} - -static struct ioat_sed_ent * -ioat3_alloc_sed(struct ioatdma_device *ioat_dma, unsigned int hw_pool) -{ - struct ioat_sed_ent *sed; - gfp_t flags = __GFP_ZERO | GFP_ATOMIC; - - sed = kmem_cache_alloc(ioat_sed_cache, flags); - if (!sed) - return NULL; - - sed->hw_pool = hw_pool; - sed->hw = dma_pool_alloc(ioat_dma->sed_hw_pool[hw_pool], - flags, &sed->dma); - if (!sed->hw) { - kmem_cache_free(ioat_sed_cache, sed); - return NULL; - } - - return sed; -} - -static void -ioat3_free_sed(struct ioatdma_device *ioat_dma, struct ioat_sed_ent *sed) -{ - if (!sed) - return; - - dma_pool_free(ioat_dma->sed_hw_pool[sed->hw_pool], sed->hw, sed->dma); - kmem_cache_free(ioat_sed_cache, sed); -} - static bool desc_has_ext(struct ioat_ring_ent *desc) { struct ioat_dma_descriptor *hw = desc->hw; @@ -184,6 +85,16 @@ static bool desc_has_ext(struct ioat_ring_ent *desc) return false; } +static void +ioat3_free_sed(struct ioatdma_device *ioat_dma, struct ioat_sed_ent *sed) +{ + if (!sed) + return; + + dma_pool_free(ioat_dma->sed_hw_pool[sed->hw_pool], sed->hw, sed->dma); + kmem_cache_free(ioat_sed_cache, sed); +} + static u64 ioat3_get_current_completion(struct ioatdma_chan *ioat_chan) { u64 phys_complete; @@ -530,556 +441,6 @@ ioat_tx_status(struct dma_chan *c, dma_cookie_t cookie, return dma_cookie_status(c, cookie, txstate); } -static struct dma_async_tx_descriptor * -__ioat_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, - dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, - size_t len, unsigned long flags) -{ - struct ioatdma_chan *ioat_chan = to_ioat_chan(c); - struct ioat_ring_ent *compl_desc; - struct ioat_ring_ent *desc; - struct ioat_ring_ent *ext; - size_t total_len = len; - struct ioat_xor_descriptor *xor; - struct ioat_xor_ext_descriptor *xor_ex = NULL; - struct ioat_dma_descriptor *hw; - int num_descs, with_ext, idx, i; - u32 offset = 0; - u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR; - - BUG_ON(src_cnt < 2); - - num_descs = ioat_xferlen_to_descs(ioat_chan, len); - /* we need 2x the number of descriptors to cover greater than 5 - * sources - */ - if (src_cnt > 5) { - with_ext = 1; - num_descs *= 2; - } else - with_ext = 0; - - /* completion writes from the raid engine may pass completion - * writes from the legacy engine, so we need one extra null - * (legacy) descriptor to ensure all completion writes arrive in - * order. - */ - if (likely(num_descs) && - ioat_check_space_lock(ioat_chan, num_descs+1) == 0) - idx = ioat_chan->head; - else - return NULL; - i = 0; - do { - struct ioat_raw_descriptor *descs[2]; - size_t xfer_size = min_t(size_t, - len, 1 << ioat_chan->xfercap_log); - int s; - - desc = ioat_get_ring_ent(ioat_chan, idx + i); - xor = desc->xor; - - /* save a branch by unconditionally retrieving the - * extended descriptor xor_set_src() knows to not write - * to it in the single descriptor case - */ - ext = ioat_get_ring_ent(ioat_chan, idx + i + 1); - xor_ex = ext->xor_ex; - - descs[0] = (struct ioat_raw_descriptor *) xor; - descs[1] = (struct ioat_raw_descriptor *) xor_ex; - for (s = 0; s < src_cnt; s++) - xor_set_src(descs, src[s], offset, s); - xor->size = xfer_size; - xor->dst_addr = dest + offset; - xor->ctl = 0; - xor->ctl_f.op = op; - xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt); - - len -= xfer_size; - offset += xfer_size; - dump_desc_dbg(ioat_chan, desc); - } while ((i += 1 + with_ext) < num_descs); - - /* last xor descriptor carries the unmap parameters and fence bit */ - desc->txd.flags = flags; - desc->len = total_len; - if (result) - desc->result = result; - xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE); - - /* completion descriptor carries interrupt bit */ - compl_desc = ioat_get_ring_ent(ioat_chan, idx + i); - compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; - hw = compl_desc->hw; - hw->ctl = 0; - hw->ctl_f.null = 1; - hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); - hw->ctl_f.compl_write = 1; - hw->size = NULL_DESC_BUFFER_SIZE; - dump_desc_dbg(ioat_chan, compl_desc); - - /* we leave the channel locked to ensure in order submission */ - return &compl_desc->txd; -} - -struct dma_async_tx_descriptor * -ioat_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, - unsigned int src_cnt, size_t len, unsigned long flags) -{ - return __ioat_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags); -} - -struct dma_async_tx_descriptor * -ioat_prep_xor_val(struct dma_chan *chan, dma_addr_t *src, - unsigned int src_cnt, size_t len, - enum sum_check_flags *result, unsigned long flags) -{ - /* the cleanup routine only sets bits on validate failure, it - * does not clear bits on validate success... so clear it here - */ - *result = 0; - - return __ioat_prep_xor_lock(chan, result, src[0], &src[1], - src_cnt - 1, len, flags); -} - -static void -dump_pq_desc_dbg(struct ioatdma_chan *ioat_chan, struct ioat_ring_ent *desc, - struct ioat_ring_ent *ext) -{ - struct device *dev = to_dev(ioat_chan); - struct ioat_pq_descriptor *pq = desc->pq; - struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL; - struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex }; - int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); - int i; - - dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" - " sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" - " src_cnt: %d)\n", - desc_id(desc), (unsigned long long) desc->txd.phys, - (unsigned long long) (pq_ex ? pq_ex->next : pq->next), - desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en, - pq->ctl_f.compl_write, - pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", - pq->ctl_f.src_cnt); - for (i = 0; i < src_cnt; i++) - dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, - (unsigned long long) pq_get_src(descs, i), pq->coef[i]); - dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); - dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); - dev_dbg(dev, "\tNEXT: %#llx\n", pq->next); -} - -static void dump_pq16_desc_dbg(struct ioatdma_chan *ioat_chan, - struct ioat_ring_ent *desc) -{ - struct device *dev = to_dev(ioat_chan); - struct ioat_pq_descriptor *pq = desc->pq; - struct ioat_raw_descriptor *descs[] = { (void *)pq, - (void *)pq, - (void *)pq }; - int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt); - int i; - - if (desc->sed) { - descs[1] = (void *)desc->sed->hw; - descs[2] = (void *)desc->sed->hw + 64; - } - - dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" - " sz: %#x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" - " src_cnt: %d)\n", - desc_id(desc), (unsigned long long) desc->txd.phys, - (unsigned long long) pq->next, - desc->txd.flags, pq->size, pq->ctl, - pq->ctl_f.op, pq->ctl_f.int_en, - pq->ctl_f.compl_write, - pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", - pq->ctl_f.src_cnt); - for (i = 0; i < src_cnt; i++) { - dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, - (unsigned long long) pq16_get_src(descs, i), - pq->coef[i]); - } - dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); - dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); -} - -static struct dma_async_tx_descriptor * -__ioat_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, - const dma_addr_t *dst, const dma_addr_t *src, - unsigned int src_cnt, const unsigned char *scf, - size_t len, unsigned long flags) -{ - struct ioatdma_chan *ioat_chan = to_ioat_chan(c); - struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; - struct ioat_ring_ent *compl_desc; - struct ioat_ring_ent *desc; - struct ioat_ring_ent *ext; - size_t total_len = len; - struct ioat_pq_descriptor *pq; - struct ioat_pq_ext_descriptor *pq_ex = NULL; - struct ioat_dma_descriptor *hw; - u32 offset = 0; - u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ; - int i, s, idx, with_ext, num_descs; - int cb32 = (ioat_dma->version < IOAT_VER_3_3) ? 1 : 0; - - dev_dbg(to_dev(ioat_chan), "%s\n", __func__); - /* the engine requires at least two sources (we provide - * at least 1 implied source in the DMA_PREP_CONTINUE case) - */ - BUG_ON(src_cnt + dmaf_continue(flags) < 2); - - num_descs = ioat_xferlen_to_descs(ioat_chan, len); - /* we need 2x the number of descriptors to cover greater than 3 - * sources (we need 1 extra source in the q-only continuation - * case and 3 extra sources in the p+q continuation case. - */ - if (src_cnt + dmaf_p_disabled_continue(flags) > 3 || - (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) { - with_ext = 1; - num_descs *= 2; - } else - with_ext = 0; - - /* completion writes from the raid engine may pass completion - * writes from the legacy engine, so we need one extra null - * (legacy) descriptor to ensure all completion writes arrive in - * order. - */ - if (likely(num_descs) && - ioat_check_space_lock(ioat_chan, num_descs + cb32) == 0) - idx = ioat_chan->head; - else - return NULL; - i = 0; - do { - struct ioat_raw_descriptor *descs[2]; - size_t xfer_size = min_t(size_t, len, - 1 << ioat_chan->xfercap_log); - - desc = ioat_get_ring_ent(ioat_chan, idx + i); - pq = desc->pq; - - /* save a branch by unconditionally retrieving the - * extended descriptor pq_set_src() knows to not write - * to it in the single descriptor case - */ - ext = ioat_get_ring_ent(ioat_chan, idx + i + with_ext); - pq_ex = ext->pq_ex; - - descs[0] = (struct ioat_raw_descriptor *) pq; - descs[1] = (struct ioat_raw_descriptor *) pq_ex; - - for (s = 0; s < src_cnt; s++) - pq_set_src(descs, src[s], offset, scf[s], s); - - /* see the comment for dma_maxpq in include/linux/dmaengine.h */ - if (dmaf_p_disabled_continue(flags)) - pq_set_src(descs, dst[1], offset, 1, s++); - else if (dmaf_continue(flags)) { - pq_set_src(descs, dst[0], offset, 0, s++); - pq_set_src(descs, dst[1], offset, 1, s++); - pq_set_src(descs, dst[1], offset, 0, s++); - } - pq->size = xfer_size; - pq->p_addr = dst[0] + offset; - pq->q_addr = dst[1] + offset; - pq->ctl = 0; - pq->ctl_f.op = op; - /* we turn on descriptor write back error status */ - if (ioat_dma->cap & IOAT_CAP_DWBES) - pq->ctl_f.wb_en = result ? 1 : 0; - pq->ctl_f.src_cnt = src_cnt_to_hw(s); - pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); - pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); - - len -= xfer_size; - offset += xfer_size; - } while ((i += 1 + with_ext) < num_descs); - - /* last pq descriptor carries the unmap parameters and fence bit */ - desc->txd.flags = flags; - desc->len = total_len; - if (result) - desc->result = result; - pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); - dump_pq_desc_dbg(ioat_chan, desc, ext); - - if (!cb32) { - pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); - pq->ctl_f.compl_write = 1; - compl_desc = desc; - } else { - /* completion descriptor carries interrupt bit */ - compl_desc = ioat_get_ring_ent(ioat_chan, idx + i); - compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; - hw = compl_desc->hw; - hw->ctl = 0; - hw->ctl_f.null = 1; - hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); - hw->ctl_f.compl_write = 1; - hw->size = NULL_DESC_BUFFER_SIZE; - dump_desc_dbg(ioat_chan, compl_desc); - } - - - /* we leave the channel locked to ensure in order submission */ - return &compl_desc->txd; -} - -static struct dma_async_tx_descriptor * -__ioat_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result, - const dma_addr_t *dst, const dma_addr_t *src, - unsigned int src_cnt, const unsigned char *scf, - size_t len, unsigned long flags) -{ - struct ioatdma_chan *ioat_chan = to_ioat_chan(c); - struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; - struct ioat_ring_ent *desc; - size_t total_len = len; - struct ioat_pq_descriptor *pq; - u32 offset = 0; - u8 op; - int i, s, idx, num_descs; - - /* this function is only called with 9-16 sources */ - op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S; - - dev_dbg(to_dev(ioat_chan), "%s\n", __func__); - - num_descs = ioat_xferlen_to_descs(ioat_chan, len); - - /* - * 16 source pq is only available on cb3.3 and has no completion - * write hw bug. - */ - if (num_descs && ioat_check_space_lock(ioat_chan, num_descs) == 0) - idx = ioat_chan->head; - else - return NULL; - - i = 0; - - do { - struct ioat_raw_descriptor *descs[4]; - size_t xfer_size = min_t(size_t, len, - 1 << ioat_chan->xfercap_log); - - desc = ioat_get_ring_ent(ioat_chan, idx + i); - pq = desc->pq; - - descs[0] = (struct ioat_raw_descriptor *) pq; - - desc->sed = ioat3_alloc_sed(ioat_dma, (src_cnt-2) >> 3); - if (!desc->sed) { - dev_err(to_dev(ioat_chan), - "%s: no free sed entries\n", __func__); - return NULL; - } - - pq->sed_addr = desc->sed->dma; - desc->sed->parent = desc; - - descs[1] = (struct ioat_raw_descriptor *)desc->sed->hw; - descs[2] = (void *)descs[1] + 64; - - for (s = 0; s < src_cnt; s++) - pq16_set_src(descs, src[s], offset, scf[s], s); - - /* see the comment for dma_maxpq in include/linux/dmaengine.h */ - if (dmaf_p_disabled_continue(flags)) - pq16_set_src(descs, dst[1], offset, 1, s++); - else if (dmaf_continue(flags)) { - pq16_set_src(descs, dst[0], offset, 0, s++); - pq16_set_src(descs, dst[1], offset, 1, s++); - pq16_set_src(descs, dst[1], offset, 0, s++); - } - - pq->size = xfer_size; - pq->p_addr = dst[0] + offset; - pq->q_addr = dst[1] + offset; - pq->ctl = 0; - pq->ctl_f.op = op; - pq->ctl_f.src_cnt = src16_cnt_to_hw(s); - /* we turn on descriptor write back error status */ - if (ioat_dma->cap & IOAT_CAP_DWBES) - pq->ctl_f.wb_en = result ? 1 : 0; - pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); - pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); - - len -= xfer_size; - offset += xfer_size; - } while (++i < num_descs); - - /* last pq descriptor carries the unmap parameters and fence bit */ - desc->txd.flags = flags; - desc->len = total_len; - if (result) - desc->result = result; - pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); - - /* with cb3.3 we should be able to do completion w/o a null desc */ - pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); - pq->ctl_f.compl_write = 1; - - dump_pq16_desc_dbg(ioat_chan, desc); - - /* we leave the channel locked to ensure in order submission */ - return &desc->txd; -} - -static int src_cnt_flags(unsigned int src_cnt, unsigned long flags) -{ - if (dmaf_p_disabled_continue(flags)) - return src_cnt + 1; - else if (dmaf_continue(flags)) - return src_cnt + 3; - else - return src_cnt; -} - -struct dma_async_tx_descriptor * -ioat_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, - unsigned int src_cnt, const unsigned char *scf, size_t len, - unsigned long flags) -{ - /* specify valid address for disabled result */ - if (flags & DMA_PREP_PQ_DISABLE_P) - dst[0] = dst[1]; - if (flags & DMA_PREP_PQ_DISABLE_Q) - dst[1] = dst[0]; - - /* handle the single source multiply case from the raid6 - * recovery path - */ - if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) { - dma_addr_t single_source[2]; - unsigned char single_source_coef[2]; - - BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q); - single_source[0] = src[0]; - single_source[1] = src[0]; - single_source_coef[0] = scf[0]; - single_source_coef[1] = 0; - - return src_cnt_flags(src_cnt, flags) > 8 ? - __ioat_prep_pq16_lock(chan, NULL, dst, single_source, - 2, single_source_coef, len, - flags) : - __ioat_prep_pq_lock(chan, NULL, dst, single_source, 2, - single_source_coef, len, flags); - - } else { - return src_cnt_flags(src_cnt, flags) > 8 ? - __ioat_prep_pq16_lock(chan, NULL, dst, src, src_cnt, - scf, len, flags) : - __ioat_prep_pq_lock(chan, NULL, dst, src, src_cnt, - scf, len, flags); - } -} - -struct dma_async_tx_descriptor * -ioat_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, - unsigned int src_cnt, const unsigned char *scf, size_t len, - enum sum_check_flags *pqres, unsigned long flags) -{ - /* specify valid address for disabled result */ - if (flags & DMA_PREP_PQ_DISABLE_P) - pq[0] = pq[1]; - if (flags & DMA_PREP_PQ_DISABLE_Q) - pq[1] = pq[0]; - - /* the cleanup routine only sets bits on validate failure, it - * does not clear bits on validate success... so clear it here - */ - *pqres = 0; - - return src_cnt_flags(src_cnt, flags) > 8 ? - __ioat_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len, - flags) : - __ioat_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, - flags); -} - -struct dma_async_tx_descriptor * -ioat_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, - unsigned int src_cnt, size_t len, unsigned long flags) -{ - unsigned char scf[src_cnt]; - dma_addr_t pq[2]; - - memset(scf, 0, src_cnt); - pq[0] = dst; - flags |= DMA_PREP_PQ_DISABLE_Q; - pq[1] = dst; /* specify valid address for disabled result */ - - return src_cnt_flags(src_cnt, flags) > 8 ? - __ioat_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len, - flags) : - __ioat_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, - flags); -} - -struct dma_async_tx_descriptor * -ioat_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, - unsigned int src_cnt, size_t len, - enum sum_check_flags *result, unsigned long flags) -{ - unsigned char scf[src_cnt]; - dma_addr_t pq[2]; - - /* the cleanup routine only sets bits on validate failure, it - * does not clear bits on validate success... so clear it here - */ - *result = 0; - - memset(scf, 0, src_cnt); - pq[0] = src[0]; - flags |= DMA_PREP_PQ_DISABLE_Q; - pq[1] = pq[0]; /* specify valid address for disabled result */ - - return src_cnt_flags(src_cnt, flags) > 8 ? - __ioat_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1, - scf, len, flags) : - __ioat_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, - scf, len, flags); -} - -struct dma_async_tx_descriptor * -ioat_prep_interrupt_lock(struct dma_chan *c, unsigned long flags) -{ - struct ioatdma_chan *ioat_chan = to_ioat_chan(c); - struct ioat_ring_ent *desc; - struct ioat_dma_descriptor *hw; - - if (ioat_check_space_lock(ioat_chan, 1) == 0) - desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head); - else - return NULL; - - hw = desc->hw; - hw->ctl = 0; - hw->ctl_f.null = 1; - hw->ctl_f.int_en = 1; - hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); - hw->ctl_f.compl_write = 1; - hw->size = NULL_DESC_BUFFER_SIZE; - hw->src_addr = 0; - hw->dst_addr = 0; - - desc->txd.flags = flags; - desc->len = 1; - - dump_desc_dbg(ioat_chan, desc); - - /* we leave the channel locked to ensure in order submission */ - return &desc->txd; -} - static int ioat3_irq_reinit(struct ioatdma_device *ioat_dma) { struct pci_dev *pdev = ioat_dma->pdev; diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index de8141c7cd01..6b8fd49cf718 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -110,6 +110,9 @@ MODULE_DEVICE_TABLE(pci, ioat_pci_tbl); static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id); static void ioat_remove(struct pci_dev *pdev); +static void +ioat_init_channel(struct ioatdma_device *ioat_dma, + struct ioatdma_chan *ioat_chan, int idx); static int ioat_dca_enabled = 1; module_param(ioat_dca_enabled, int, 0644); @@ -269,7 +272,7 @@ static void ioat_dma_test_callback(void *dma_async_param) * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works. * @ioat_dma: dma device to be tested */ -int ioat_dma_self_test(struct ioatdma_device *ioat_dma) +static int ioat_dma_self_test(struct ioatdma_device *ioat_dma) { int i; u8 *src; @@ -453,7 +456,6 @@ err_no_irq: dev_err(dev, "no usable interrupts\n"); return err; } -EXPORT_SYMBOL(ioat_dma_setup_interrupts); static void ioat_disable_interrupts(struct ioatdma_device *ioat_dma) { @@ -461,7 +463,7 @@ static void ioat_disable_interrupts(struct ioatdma_device *ioat_dma) writeb(0, ioat_dma->reg_base + IOAT_INTRCTRL_OFFSET); } -int ioat_probe(struct ioatdma_device *ioat_dma) +static int ioat_probe(struct ioatdma_device *ioat_dma) { int err = -ENODEV; struct dma_device *dma = &ioat_dma->dma_dev; @@ -517,7 +519,7 @@ err_dma_pool: return err; } -int ioat_register(struct ioatdma_device *ioat_dma) +static int ioat_register(struct ioatdma_device *ioat_dma) { int err = dma_async_device_register(&ioat_dma->dma_dev); @@ -530,7 +532,7 @@ int ioat_register(struct ioatdma_device *ioat_dma) return err; } -void ioat_dma_remove(struct ioatdma_device *ioat_dma) +static void ioat_dma_remove(struct ioatdma_device *ioat_dma) { struct dma_device *dma = &ioat_dma->dma_dev; @@ -550,7 +552,7 @@ void ioat_dma_remove(struct ioatdma_device *ioat_dma) * ioat_enumerate_channels - find and initialize the device's channels * @ioat_dma: the ioat dma device to be enumerated */ -int ioat_enumerate_channels(struct ioatdma_device *ioat_dma) +static int ioat_enumerate_channels(struct ioatdma_device *ioat_dma) { struct ioatdma_chan *ioat_chan; struct device *dev = &ioat_dma->pdev->dev; @@ -593,7 +595,7 @@ int ioat_enumerate_channels(struct ioatdma_device *ioat_dma) * ioat_free_chan_resources - release all the descriptors * @chan: the channel to be cleaned */ -void ioat_free_chan_resources(struct dma_chan *c) +static void ioat_free_chan_resources(struct dma_chan *c) { struct ioatdma_chan *ioat_chan = to_ioat_chan(c); struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; @@ -646,7 +648,7 @@ void ioat_free_chan_resources(struct dma_chan *c) /* ioat_alloc_chan_resources - allocate/initialize ioat descriptor ring * @chan: channel to be initialized */ -int ioat_alloc_chan_resources(struct dma_chan *c) +static int ioat_alloc_chan_resources(struct dma_chan *c) { struct ioatdma_chan *ioat_chan = to_ioat_chan(c); struct ioat_ring_ent **ring; @@ -712,7 +714,7 @@ int ioat_alloc_chan_resources(struct dma_chan *c) } /* common channel initialization */ -void +static void ioat_init_channel(struct ioatdma_device *ioat_dma, struct ioatdma_chan *ioat_chan, int idx) { @@ -1048,7 +1050,7 @@ static void ioat3_intr_quirk(struct ioatdma_device *ioat_dma) } } -int ioat3_dma_probe(struct ioatdma_device *ioat_dma, int dca) +static int ioat3_dma_probe(struct ioatdma_device *ioat_dma, int dca) { struct pci_dev *pdev = ioat_dma->pdev; int dca_en = system_has_dca_enabled(pdev); diff --git a/drivers/dma/ioat/prep.c b/drivers/dma/ioat/prep.c new file mode 100644 index 000000000000..e323a4036908 --- /dev/null +++ b/drivers/dma/ioat/prep.c @@ -0,0 +1,707 @@ +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2004 - 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ +#include +#include +#include +#include +#include +#include +#include "../dmaengine.h" +#include "registers.h" +#include "hw.h" +#include "dma.h" + +/* provide a lookup table for setting the source address in the base or + * extended descriptor of an xor or pq descriptor + */ +static const u8 xor_idx_to_desc = 0xe0; +static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 }; +static const u8 pq_idx_to_desc = 0xf8; +static const u8 pq16_idx_to_desc[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2 }; +static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 }; +static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7, + 0, 1, 2, 3, 4, 5, 6 }; + +static void xor_set_src(struct ioat_raw_descriptor *descs[2], + dma_addr_t addr, u32 offset, int idx) +{ + struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; + + raw->field[xor_idx_to_field[idx]] = addr + offset; +} + +static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx) +{ + struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; + + return raw->field[pq_idx_to_field[idx]]; +} + +static dma_addr_t pq16_get_src(struct ioat_raw_descriptor *desc[3], int idx) +{ + struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; + + return raw->field[pq16_idx_to_field[idx]]; +} + +static void pq_set_src(struct ioat_raw_descriptor *descs[2], + dma_addr_t addr, u32 offset, u8 coef, int idx) +{ + struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0]; + struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; + + raw->field[pq_idx_to_field[idx]] = addr + offset; + pq->coef[idx] = coef; +} + +static void pq16_set_src(struct ioat_raw_descriptor *desc[3], + dma_addr_t addr, u32 offset, u8 coef, unsigned idx) +{ + struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *)desc[0]; + struct ioat_pq16a_descriptor *pq16 = + (struct ioat_pq16a_descriptor *)desc[1]; + struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; + + raw->field[pq16_idx_to_field[idx]] = addr + offset; + + if (idx < 8) + pq->coef[idx] = coef; + else + pq16->coef[idx - 8] = coef; +} + +static struct ioat_sed_ent * +ioat3_alloc_sed(struct ioatdma_device *ioat_dma, unsigned int hw_pool) +{ + struct ioat_sed_ent *sed; + gfp_t flags = __GFP_ZERO | GFP_ATOMIC; + + sed = kmem_cache_alloc(ioat_sed_cache, flags); + if (!sed) + return NULL; + + sed->hw_pool = hw_pool; + sed->hw = dma_pool_alloc(ioat_dma->sed_hw_pool[hw_pool], + flags, &sed->dma); + if (!sed->hw) { + kmem_cache_free(ioat_sed_cache, sed); + return NULL; + } + + return sed; +} + +struct dma_async_tx_descriptor * +ioat_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioat_dma_descriptor *hw; + struct ioat_ring_ent *desc; + dma_addr_t dst = dma_dest; + dma_addr_t src = dma_src; + size_t total_len = len; + int num_descs, idx, i; + + num_descs = ioat_xferlen_to_descs(ioat_chan, len); + if (likely(num_descs) && + ioat_check_space_lock(ioat_chan, num_descs) == 0) + idx = ioat_chan->head; + else + return NULL; + i = 0; + do { + size_t copy = min_t(size_t, len, 1 << ioat_chan->xfercap_log); + + desc = ioat_get_ring_ent(ioat_chan, idx + i); + hw = desc->hw; + + hw->size = copy; + hw->ctl = 0; + hw->src_addr = src; + hw->dst_addr = dst; + + len -= copy; + dst += copy; + src += copy; + dump_desc_dbg(ioat_chan, desc); + } while (++i < num_descs); + + desc->txd.flags = flags; + desc->len = total_len; + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + hw->ctl_f.compl_write = 1; + dump_desc_dbg(ioat_chan, desc); + /* we leave the channel locked to ensure in order submission */ + + return &desc->txd; +} + + +static struct dma_async_tx_descriptor * +__ioat_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, + dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, + size_t len, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioat_ring_ent *compl_desc; + struct ioat_ring_ent *desc; + struct ioat_ring_ent *ext; + size_t total_len = len; + struct ioat_xor_descriptor *xor; + struct ioat_xor_ext_descriptor *xor_ex = NULL; + struct ioat_dma_descriptor *hw; + int num_descs, with_ext, idx, i; + u32 offset = 0; + u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR; + + BUG_ON(src_cnt < 2); + + num_descs = ioat_xferlen_to_descs(ioat_chan, len); + /* we need 2x the number of descriptors to cover greater than 5 + * sources + */ + if (src_cnt > 5) { + with_ext = 1; + num_descs *= 2; + } else + with_ext = 0; + + /* completion writes from the raid engine may pass completion + * writes from the legacy engine, so we need one extra null + * (legacy) descriptor to ensure all completion writes arrive in + * order. + */ + if (likely(num_descs) && + ioat_check_space_lock(ioat_chan, num_descs+1) == 0) + idx = ioat_chan->head; + else + return NULL; + i = 0; + do { + struct ioat_raw_descriptor *descs[2]; + size_t xfer_size = min_t(size_t, + len, 1 << ioat_chan->xfercap_log); + int s; + + desc = ioat_get_ring_ent(ioat_chan, idx + i); + xor = desc->xor; + + /* save a branch by unconditionally retrieving the + * extended descriptor xor_set_src() knows to not write + * to it in the single descriptor case + */ + ext = ioat_get_ring_ent(ioat_chan, idx + i + 1); + xor_ex = ext->xor_ex; + + descs[0] = (struct ioat_raw_descriptor *) xor; + descs[1] = (struct ioat_raw_descriptor *) xor_ex; + for (s = 0; s < src_cnt; s++) + xor_set_src(descs, src[s], offset, s); + xor->size = xfer_size; + xor->dst_addr = dest + offset; + xor->ctl = 0; + xor->ctl_f.op = op; + xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt); + + len -= xfer_size; + offset += xfer_size; + dump_desc_dbg(ioat_chan, desc); + } while ((i += 1 + with_ext) < num_descs); + + /* last xor descriptor carries the unmap parameters and fence bit */ + desc->txd.flags = flags; + desc->len = total_len; + if (result) + desc->result = result; + xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + + /* completion descriptor carries interrupt bit */ + compl_desc = ioat_get_ring_ent(ioat_chan, idx + i); + compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; + hw = compl_desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.compl_write = 1; + hw->size = NULL_DESC_BUFFER_SIZE; + dump_desc_dbg(ioat_chan, compl_desc); + + /* we leave the channel locked to ensure in order submission */ + return &compl_desc->txd; +} + +struct dma_async_tx_descriptor * +ioat_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags) +{ + return __ioat_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags); +} + +struct dma_async_tx_descriptor * +ioat_prep_xor_val(struct dma_chan *chan, dma_addr_t *src, + unsigned int src_cnt, size_t len, + enum sum_check_flags *result, unsigned long flags) +{ + /* the cleanup routine only sets bits on validate failure, it + * does not clear bits on validate success... so clear it here + */ + *result = 0; + + return __ioat_prep_xor_lock(chan, result, src[0], &src[1], + src_cnt - 1, len, flags); +} + +static void +dump_pq_desc_dbg(struct ioatdma_chan *ioat_chan, struct ioat_ring_ent *desc, + struct ioat_ring_ent *ext) +{ + struct device *dev = to_dev(ioat_chan); + struct ioat_pq_descriptor *pq = desc->pq; + struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL; + struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex }; + int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); + int i; + + dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" + " sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" + " src_cnt: %d)\n", + desc_id(desc), (unsigned long long) desc->txd.phys, + (unsigned long long) (pq_ex ? pq_ex->next : pq->next), + desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, + pq->ctl_f.int_en, pq->ctl_f.compl_write, + pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", + pq->ctl_f.src_cnt); + for (i = 0; i < src_cnt; i++) + dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, + (unsigned long long) pq_get_src(descs, i), pq->coef[i]); + dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); + dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); + dev_dbg(dev, "\tNEXT: %#llx\n", pq->next); +} + +static void dump_pq16_desc_dbg(struct ioatdma_chan *ioat_chan, + struct ioat_ring_ent *desc) +{ + struct device *dev = to_dev(ioat_chan); + struct ioat_pq_descriptor *pq = desc->pq; + struct ioat_raw_descriptor *descs[] = { (void *)pq, + (void *)pq, + (void *)pq }; + int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt); + int i; + + if (desc->sed) { + descs[1] = (void *)desc->sed->hw; + descs[2] = (void *)desc->sed->hw + 64; + } + + dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" + " sz: %#x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" + " src_cnt: %d)\n", + desc_id(desc), (unsigned long long) desc->txd.phys, + (unsigned long long) pq->next, + desc->txd.flags, pq->size, pq->ctl, + pq->ctl_f.op, pq->ctl_f.int_en, + pq->ctl_f.compl_write, + pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", + pq->ctl_f.src_cnt); + for (i = 0; i < src_cnt; i++) { + dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, + (unsigned long long) pq16_get_src(descs, i), + pq->coef[i]); + } + dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); + dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); +} + +static struct dma_async_tx_descriptor * +__ioat_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, + const dma_addr_t *dst, const dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; + struct ioat_ring_ent *compl_desc; + struct ioat_ring_ent *desc; + struct ioat_ring_ent *ext; + size_t total_len = len; + struct ioat_pq_descriptor *pq; + struct ioat_pq_ext_descriptor *pq_ex = NULL; + struct ioat_dma_descriptor *hw; + u32 offset = 0; + u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ; + int i, s, idx, with_ext, num_descs; + int cb32 = (ioat_dma->version < IOAT_VER_3_3) ? 1 : 0; + + dev_dbg(to_dev(ioat_chan), "%s\n", __func__); + /* the engine requires at least two sources (we provide + * at least 1 implied source in the DMA_PREP_CONTINUE case) + */ + BUG_ON(src_cnt + dmaf_continue(flags) < 2); + + num_descs = ioat_xferlen_to_descs(ioat_chan, len); + /* we need 2x the number of descriptors to cover greater than 3 + * sources (we need 1 extra source in the q-only continuation + * case and 3 extra sources in the p+q continuation case. + */ + if (src_cnt + dmaf_p_disabled_continue(flags) > 3 || + (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) { + with_ext = 1; + num_descs *= 2; + } else + with_ext = 0; + + /* completion writes from the raid engine may pass completion + * writes from the legacy engine, so we need one extra null + * (legacy) descriptor to ensure all completion writes arrive in + * order. + */ + if (likely(num_descs) && + ioat_check_space_lock(ioat_chan, num_descs + cb32) == 0) + idx = ioat_chan->head; + else + return NULL; + i = 0; + do { + struct ioat_raw_descriptor *descs[2]; + size_t xfer_size = min_t(size_t, len, + 1 << ioat_chan->xfercap_log); + + desc = ioat_get_ring_ent(ioat_chan, idx + i); + pq = desc->pq; + + /* save a branch by unconditionally retrieving the + * extended descriptor pq_set_src() knows to not write + * to it in the single descriptor case + */ + ext = ioat_get_ring_ent(ioat_chan, idx + i + with_ext); + pq_ex = ext->pq_ex; + + descs[0] = (struct ioat_raw_descriptor *) pq; + descs[1] = (struct ioat_raw_descriptor *) pq_ex; + + for (s = 0; s < src_cnt; s++) + pq_set_src(descs, src[s], offset, scf[s], s); + + /* see the comment for dma_maxpq in include/linux/dmaengine.h */ + if (dmaf_p_disabled_continue(flags)) + pq_set_src(descs, dst[1], offset, 1, s++); + else if (dmaf_continue(flags)) { + pq_set_src(descs, dst[0], offset, 0, s++); + pq_set_src(descs, dst[1], offset, 1, s++); + pq_set_src(descs, dst[1], offset, 0, s++); + } + pq->size = xfer_size; + pq->p_addr = dst[0] + offset; + pq->q_addr = dst[1] + offset; + pq->ctl = 0; + pq->ctl_f.op = op; + /* we turn on descriptor write back error status */ + if (ioat_dma->cap & IOAT_CAP_DWBES) + pq->ctl_f.wb_en = result ? 1 : 0; + pq->ctl_f.src_cnt = src_cnt_to_hw(s); + pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); + pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); + + len -= xfer_size; + offset += xfer_size; + } while ((i += 1 + with_ext) < num_descs); + + /* last pq descriptor carries the unmap parameters and fence bit */ + desc->txd.flags = flags; + desc->len = total_len; + if (result) + desc->result = result; + pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + dump_pq_desc_dbg(ioat_chan, desc, ext); + + if (!cb32) { + pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + pq->ctl_f.compl_write = 1; + compl_desc = desc; + } else { + /* completion descriptor carries interrupt bit */ + compl_desc = ioat_get_ring_ent(ioat_chan, idx + i); + compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; + hw = compl_desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.compl_write = 1; + hw->size = NULL_DESC_BUFFER_SIZE; + dump_desc_dbg(ioat_chan, compl_desc); + } + + + /* we leave the channel locked to ensure in order submission */ + return &compl_desc->txd; +} + +static struct dma_async_tx_descriptor * +__ioat_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result, + const dma_addr_t *dst, const dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; + struct ioat_ring_ent *desc; + size_t total_len = len; + struct ioat_pq_descriptor *pq; + u32 offset = 0; + u8 op; + int i, s, idx, num_descs; + + /* this function is only called with 9-16 sources */ + op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S; + + dev_dbg(to_dev(ioat_chan), "%s\n", __func__); + + num_descs = ioat_xferlen_to_descs(ioat_chan, len); + + /* + * 16 source pq is only available on cb3.3 and has no completion + * write hw bug. + */ + if (num_descs && ioat_check_space_lock(ioat_chan, num_descs) == 0) + idx = ioat_chan->head; + else + return NULL; + + i = 0; + + do { + struct ioat_raw_descriptor *descs[4]; + size_t xfer_size = min_t(size_t, len, + 1 << ioat_chan->xfercap_log); + + desc = ioat_get_ring_ent(ioat_chan, idx + i); + pq = desc->pq; + + descs[0] = (struct ioat_raw_descriptor *) pq; + + desc->sed = ioat3_alloc_sed(ioat_dma, (src_cnt-2) >> 3); + if (!desc->sed) { + dev_err(to_dev(ioat_chan), + "%s: no free sed entries\n", __func__); + return NULL; + } + + pq->sed_addr = desc->sed->dma; + desc->sed->parent = desc; + + descs[1] = (struct ioat_raw_descriptor *)desc->sed->hw; + descs[2] = (void *)descs[1] + 64; + + for (s = 0; s < src_cnt; s++) + pq16_set_src(descs, src[s], offset, scf[s], s); + + /* see the comment for dma_maxpq in include/linux/dmaengine.h */ + if (dmaf_p_disabled_continue(flags)) + pq16_set_src(descs, dst[1], offset, 1, s++); + else if (dmaf_continue(flags)) { + pq16_set_src(descs, dst[0], offset, 0, s++); + pq16_set_src(descs, dst[1], offset, 1, s++); + pq16_set_src(descs, dst[1], offset, 0, s++); + } + + pq->size = xfer_size; + pq->p_addr = dst[0] + offset; + pq->q_addr = dst[1] + offset; + pq->ctl = 0; + pq->ctl_f.op = op; + pq->ctl_f.src_cnt = src16_cnt_to_hw(s); + /* we turn on descriptor write back error status */ + if (ioat_dma->cap & IOAT_CAP_DWBES) + pq->ctl_f.wb_en = result ? 1 : 0; + pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); + pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); + + len -= xfer_size; + offset += xfer_size; + } while (++i < num_descs); + + /* last pq descriptor carries the unmap parameters and fence bit */ + desc->txd.flags = flags; + desc->len = total_len; + if (result) + desc->result = result; + pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + + /* with cb3.3 we should be able to do completion w/o a null desc */ + pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + pq->ctl_f.compl_write = 1; + + dump_pq16_desc_dbg(ioat_chan, desc); + + /* we leave the channel locked to ensure in order submission */ + return &desc->txd; +} + +static int src_cnt_flags(unsigned int src_cnt, unsigned long flags) +{ + if (dmaf_p_disabled_continue(flags)) + return src_cnt + 1; + else if (dmaf_continue(flags)) + return src_cnt + 3; + else + return src_cnt; +} + +struct dma_async_tx_descriptor * +ioat_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + unsigned long flags) +{ + /* specify valid address for disabled result */ + if (flags & DMA_PREP_PQ_DISABLE_P) + dst[0] = dst[1]; + if (flags & DMA_PREP_PQ_DISABLE_Q) + dst[1] = dst[0]; + + /* handle the single source multiply case from the raid6 + * recovery path + */ + if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) { + dma_addr_t single_source[2]; + unsigned char single_source_coef[2]; + + BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q); + single_source[0] = src[0]; + single_source[1] = src[0]; + single_source_coef[0] = scf[0]; + single_source_coef[1] = 0; + + return src_cnt_flags(src_cnt, flags) > 8 ? + __ioat_prep_pq16_lock(chan, NULL, dst, single_source, + 2, single_source_coef, len, + flags) : + __ioat_prep_pq_lock(chan, NULL, dst, single_source, 2, + single_source_coef, len, flags); + + } else { + return src_cnt_flags(src_cnt, flags) > 8 ? + __ioat_prep_pq16_lock(chan, NULL, dst, src, src_cnt, + scf, len, flags) : + __ioat_prep_pq_lock(chan, NULL, dst, src, src_cnt, + scf, len, flags); + } +} + +struct dma_async_tx_descriptor * +ioat_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + enum sum_check_flags *pqres, unsigned long flags) +{ + /* specify valid address for disabled result */ + if (flags & DMA_PREP_PQ_DISABLE_P) + pq[0] = pq[1]; + if (flags & DMA_PREP_PQ_DISABLE_Q) + pq[1] = pq[0]; + + /* the cleanup routine only sets bits on validate failure, it + * does not clear bits on validate success... so clear it here + */ + *pqres = 0; + + return src_cnt_flags(src_cnt, flags) > 8 ? + __ioat_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len, + flags) : + __ioat_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, + flags); +} + +struct dma_async_tx_descriptor * +ioat_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags) +{ + unsigned char scf[src_cnt]; + dma_addr_t pq[2]; + + memset(scf, 0, src_cnt); + pq[0] = dst; + flags |= DMA_PREP_PQ_DISABLE_Q; + pq[1] = dst; /* specify valid address for disabled result */ + + return src_cnt_flags(src_cnt, flags) > 8 ? + __ioat_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len, + flags) : + __ioat_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, + flags); +} + +struct dma_async_tx_descriptor * +ioat_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, + unsigned int src_cnt, size_t len, + enum sum_check_flags *result, unsigned long flags) +{ + unsigned char scf[src_cnt]; + dma_addr_t pq[2]; + + /* the cleanup routine only sets bits on validate failure, it + * does not clear bits on validate success... so clear it here + */ + *result = 0; + + memset(scf, 0, src_cnt); + pq[0] = src[0]; + flags |= DMA_PREP_PQ_DISABLE_Q; + pq[1] = pq[0]; /* specify valid address for disabled result */ + + return src_cnt_flags(src_cnt, flags) > 8 ? + __ioat_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1, + scf, len, flags) : + __ioat_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, + scf, len, flags); +} + +struct dma_async_tx_descriptor * +ioat_prep_interrupt_lock(struct dma_chan *c, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioat_ring_ent *desc; + struct ioat_dma_descriptor *hw; + + if (ioat_check_space_lock(ioat_chan, 1) == 0) + desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head); + else + return NULL; + + hw = desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = 1; + hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + hw->ctl_f.compl_write = 1; + hw->size = NULL_DESC_BUFFER_SIZE; + hw->src_addr = 0; + hw->dst_addr = 0; + + desc->txd.flags = flags; + desc->len = 1; + + dump_desc_dbg(ioat_chan, desc); + + /* we leave the channel locked to ensure in order submission */ + return &desc->txd; +} + -- cgit