// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright 2020-21 IBM Corp. */ #define pr_fmt(fmt) "vas: " fmt #include #include #include #include #include #include #include #include #include #include #include #include #include "vas.h" #define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul #define VAS_DEFAULT_DOMAIN_ID 0xFFFFFFFFFFFFFFFFul /* The hypervisor allows one credit per window right now */ #define DEF_WIN_CREDS 1 static struct vas_all_caps caps_all; static bool copypaste_feat; static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE]; static DEFINE_MUTEX(vas_pseries_mutex); static long hcall_return_busy_check(long rc) { /* Check if we are stalled for some time */ if (H_IS_LONG_BUSY(rc)) { msleep(get_longbusy_msecs(rc)); rc = H_BUSY; } else if (rc == H_BUSY) { cond_resched(); } return rc; } /* * Allocate VAS window hcall */ static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain, u8 wintype, u16 credits) { long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; long rc; do { rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype, credits, domain[0], domain[1], domain[2], domain[3], domain[4], domain[5]); rc = hcall_return_busy_check(rc); } while (rc == H_BUSY); if (rc == H_SUCCESS) { if (win->win_addr == VAS_INVALID_WIN_ADDRESS) { pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n"); return -ENOTSUPP; } win->vas_win.winid = retbuf[0]; win->win_addr = retbuf[1]; win->complete_irq = retbuf[2]; win->fault_irq = retbuf[3]; return 0; } pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n", rc, wintype, credits); return -EIO; } /* * Deallocate VAS window hcall. */ static int h_deallocate_vas_window(u64 winid) { long rc; do { rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid); rc = hcall_return_busy_check(rc); } while (rc == H_BUSY); if (rc == H_SUCCESS) return 0; pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n", rc, winid); return -EIO; } /* * Modify VAS window. * After the window is opened with allocate window hcall, configure it * with flags and LPAR PID before using. */ static int h_modify_vas_window(struct pseries_vas_window *win) { long rc; u32 lpid = mfspr(SPRN_PID); /* * AMR value is not supported in Linux VAS implementation. * The hypervisor ignores it if 0 is passed. */ do { rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW, win->vas_win.winid, lpid, 0, VAS_MOD_WIN_FLAGS, 0); rc = hcall_return_busy_check(rc); } while (rc == H_BUSY); if (rc == H_SUCCESS) return 0; pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u lpid %u\n", rc, win->vas_win.winid, lpid); return -EIO; } /* * This hcall is used to determine the capabilities from the hypervisor. * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES * @query_type: If 0 is passed, the hypervisor returns the overall * capabilities which provides all feature(s) that are * available. Then query the hypervisor to get the * corresponding capabilities for the specific feature. * Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS * and VAS GZIP Default capabilities. * H_QUERY_NX_CAPABILITIES provides NX GZIP * capabilities. * @result: Return buffer to save capabilities. */ int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result) { long rc; rc = plpar_hcall_norets(hcall, query_type, result); if (rc == H_SUCCESS) return 0; pr_err("HCALL(%llx) error %ld, query_type %u, result buffer 0x%llx\n", hcall, rc, query_type, result); return -EIO; } EXPORT_SYMBOL_GPL(h_query_vas_capabilities); /* * hcall to get fault CRB from the hypervisor. */ static int h_get_nx_fault(u32 winid, u64 buffer) { long rc; rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer); if (rc == H_SUCCESS) return 0; pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n", rc, winid, buffer); return -EIO; } /* * Handle the fault interrupt. * When the fault interrupt is received for each window, query the * hypervisor to get the fault CRB on the specific fault. Then * process the CRB by updating CSB or send signal if the user space * CSB is invalid. * Note: The hypervisor forwards an interrupt for each fault request. * So one fault CRB to process for each H_GET_NX_FAULT hcall. */ static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data) { struct pseries_vas_window *txwin = data; struct coprocessor_request_block crb; struct vas_user_win_ref *tsk_ref; int rc; rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb)); if (!rc) { tsk_ref = &txwin->vas_win.task_ref; vas_dump_crb(&crb); vas_update_csb(&crb, tsk_ref); } return IRQ_HANDLED; } /* * Allocate window and setup IRQ mapping. */ static int allocate_setup_window(struct pseries_vas_window *txwin, u64 *domain, u8 wintype) { int rc; rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS); if (rc) return rc; /* * On PowerVM, the hypervisor setup and forwards the fault * interrupt per window. So the IRQ setup and fault handling * will be done for each open window separately. */ txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq); if (!txwin->fault_virq) { pr_err("Failed irq mapping %d\n", txwin->fault_irq); rc = -EINVAL; goto out_win; } txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d", txwin->vas_win.winid); if (!txwin->name) { rc = -ENOMEM; goto out_irq; } rc = request_threaded_irq(txwin->fault_virq, NULL, pseries_vas_fault_thread_fn, IRQF_ONESHOT, txwin->name, txwin); if (rc) { pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n", txwin->vas_win.winid, txwin->fault_virq, rc); goto out_free; } txwin->vas_win.wcreds_max = DEF_WIN_CREDS; return 0; out_free: kfree(txwin->name); out_irq: irq_dispose_mapping(txwin->fault_virq); out_win: h_deallocate_vas_window(txwin->vas_win.winid); return rc; } static inline void free_irq_setup(struct pseries_vas_window *txwin) { free_irq(txwin->fault_virq, txwin); kfree(txwin->name); irq_dispose_mapping(txwin->fault_virq); } static struct vas_window *vas_allocate_window(int vas_id, u64 flags, enum vas_cop_type cop_type) { long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; struct vas_cop_feat_caps *cop_feat_caps; struct vas_caps *caps; struct pseries_vas_window *txwin; int rc; txwin = kzalloc(sizeof(*txwin), GFP_KERNEL); if (!txwin) return ERR_PTR(-ENOMEM); /* * A VAS window can have many credits which means that many * requests can be issued simultaneously. But the hypervisor * restricts one credit per window. * The hypervisor introduces 2 different types of credits: * Default credit type (Uses normal priority FIFO): * A limited number of credits are assigned to partitions * based on processor entitlement. But these credits may be * over-committed on a system depends on whether the CPUs * are in shared or dedicated modes - that is, more requests * may be issued across the system than NX can service at * once which can result in paste command failure (RMA_busy). * Then the process has to resend requests or fall-back to * SW compression. * Quality of Service (QoS) credit type (Uses high priority FIFO): * To avoid NX HW contention, the system admins can assign * QoS credits for each LPAR so that this partition is * guaranteed access to NX resources. These credits are * assigned to partitions via the HMC. * Refer PAPR for more information. * * Allocate window with QoS credits if user requested. Otherwise * default credits are used. */ if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT) caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE]; else caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE]; cop_feat_caps = &caps->caps; if (atomic_inc_return(&cop_feat_caps->used_lpar_creds) > atomic_read(&cop_feat_caps->target_lpar_creds)) { pr_err("Credits are not available to allocate window\n"); rc = -EINVAL; goto out; } if (vas_id == -1) { /* * The user space is requesting to allocate a window on * a VAS instance where the process is executing. * On PowerVM, domain values are passed to the hypervisor * to select VAS instance. Useful if the process is * affinity to NUMA node. * The hypervisor selects VAS instance if * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values. * The h_allocate_vas_window hcall is defined to take a * domain values as specified by h_home_node_associativity, * So no unpacking needs to be done. */ rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain, VPHN_FLAG_VCPU, smp_processor_id()); if (rc != H_SUCCESS) { pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc); goto out; } } /* * Allocate / Deallocate window hcalls and setup / free IRQs * have to be protected with mutex. * Open VAS window: Allocate window hcall and setup IRQ * Close VAS window: Deallocate window hcall and free IRQ * The hypervisor waits until all NX requests are * completed before closing the window. So expects OS * to handle NX faults, means IRQ can be freed only * after the deallocate window hcall is returned. * So once the window is closed with deallocate hcall before * the IRQ is freed, it can be assigned to new allocate * hcall with the same fault IRQ by the hypervisor. It can * result in setup IRQ fail for the new window since the * same fault IRQ is not freed by the OS before. */ mutex_lock(&vas_pseries_mutex); rc = allocate_setup_window(txwin, (u64 *)&domain[0], cop_feat_caps->win_type); mutex_unlock(&vas_pseries_mutex); if (rc) goto out; /* * Modify window and it is ready to use. */ rc = h_modify_vas_window(txwin); if (!rc) rc = get_vas_user_win_ref(&txwin->vas_win.task_ref); if (rc) goto out_free; vas_user_win_add_mm_context(&txwin->vas_win.task_ref); txwin->win_type = cop_feat_caps->win_type; mutex_lock(&vas_pseries_mutex); list_add(&txwin->win_list, &caps->list); mutex_unlock(&vas_pseries_mutex); return &txwin->vas_win; out_free: /* * Window is not operational. Free IRQ before closing * window so that do not have to hold mutex. */ free_irq_setup(txwin); h_deallocate_vas_window(txwin->vas_win.winid); out: atomic_dec(&cop_feat_caps->used_lpar_creds); kfree(txwin); return ERR_PTR(rc); } static u64 vas_paste_address(struct vas_window *vwin) { struct pseries_vas_window *win; win = container_of(vwin, struct pseries_vas_window, vas_win); return win->win_addr; } static int deallocate_free_window(struct pseries_vas_window *win) { int rc = 0; /* * The hypervisor waits for all requests including faults * are processed before closing the window - Means all * credits have to be returned. In the case of fault * request, a credit is returned after OS issues * H_GET_NX_FAULT hcall. * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW * hcall. */ rc = h_deallocate_vas_window(win->vas_win.winid); if (!rc) free_irq_setup(win); return rc; } static int vas_deallocate_window(struct vas_window *vwin) { struct pseries_vas_window *win; struct vas_cop_feat_caps *caps; int rc = 0; if (!vwin) return -EINVAL; win = container_of(vwin, struct pseries_vas_window, vas_win); /* Should not happen */ if (win->win_type >= VAS_MAX_FEAT_TYPE) { pr_err("Window (%u): Invalid window type %u\n", vwin->winid, win->win_type); return -EINVAL; } caps = &vascaps[win->win_type].caps; mutex_lock(&vas_pseries_mutex); rc = deallocate_free_window(win); if (rc) { mutex_unlock(&vas_pseries_mutex); return rc; } list_del(&win->win_list); atomic_dec(&caps->used_lpar_creds); mutex_unlock(&vas_pseries_mutex); put_vas_user_win_ref(&vwin->task_ref); mm_context_remove_vas_window(vwin->task_ref.mm); kfree(win); return 0; } static const struct vas_user_win_ops vops_pseries = { .open_win = vas_allocate_window, /* Open and configure window */ .paste_addr = vas_paste_address, /* To do copy/paste */ .close_win = vas_deallocate_window, /* Close window */ }; /* * Supporting only nx-gzip coprocessor type now, but this API code * extended to other coprocessor types later. */ int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type, const char *name) { int rc; if (!copypaste_feat) return -ENOTSUPP; rc = vas_register_coproc_api(mod, cop_type, name, &vops_pseries); return rc; } EXPORT_SYMBOL_GPL(vas_register_api_pseries); void vas_unregister_api_pseries(void) { vas_unregister_coproc_api(); } EXPORT_SYMBOL_GPL(vas_unregister_api_pseries); /* * Get the specific capabilities based on the feature type. * Right now supports GZIP default and GZIP QoS capabilities. */ static int get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, struct hv_vas_cop_feat_caps *hv_caps) { struct vas_cop_feat_caps *caps; struct vas_caps *vcaps; int rc = 0; vcaps = &vascaps[type]; memset(vcaps, 0, sizeof(*vcaps)); INIT_LIST_HEAD(&vcaps->list); caps = &vcaps->caps; rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat, (u64)virt_to_phys(hv_caps)); if (rc) return rc; caps->user_mode = hv_caps->user_mode; if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) { pr_err("User space COPY/PASTE is not supported\n"); return -ENOTSUPP; } caps->descriptor = be64_to_cpu(hv_caps->descriptor); caps->win_type = hv_caps->win_type; if (caps->win_type >= VAS_MAX_FEAT_TYPE) { pr_err("Unsupported window type %u\n", caps->win_type); return -EINVAL; } caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds); caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds); atomic_set(&caps->target_lpar_creds, be16_to_cpu(hv_caps->target_lpar_creds)); if (feat == VAS_GZIP_DEF_FEAT) { caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds); if (caps->max_win_creds < DEF_WIN_CREDS) { pr_err("Window creds(%u) > max allowed window creds(%u)\n", DEF_WIN_CREDS, caps->max_win_creds); return -EINVAL; } } copypaste_feat = true; return 0; } static int __init pseries_vas_init(void) { struct hv_vas_cop_feat_caps *hv_cop_caps; struct hv_vas_all_caps *hv_caps; int rc; /* * Linux supports user space COPY/PASTE only with Radix */ if (!radix_enabled()) { pr_err("API is supported only with radix page tables\n"); return -ENOTSUPP; } hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL); if (!hv_caps) return -ENOMEM; /* * Get VAS overall capabilities by passing 0 to feature type. */ rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0, (u64)virt_to_phys(hv_caps)); if (rc) goto out; caps_all.descriptor = be64_to_cpu(hv_caps->descriptor); caps_all.feat_type = be64_to_cpu(hv_caps->feat_type); hv_cop_caps = kmalloc(sizeof(*hv_cop_caps), GFP_KERNEL); if (!hv_cop_caps) { rc = -ENOMEM; goto out; } /* * QOS capabilities available */ if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) { rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT, VAS_GZIP_QOS_FEAT_TYPE, hv_cop_caps); if (rc) goto out_cop; } /* * Default capabilities available */ if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) { rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT, VAS_GZIP_DEF_FEAT_TYPE, hv_cop_caps); if (rc) goto out_cop; } pr_info("GZIP feature is available\n"); out_cop: kfree(hv_cop_caps); out: kfree(hv_caps); return rc; } machine_device_initcall(pseries, pseries_vas_init);