diff options
| -rw-r--r-- | drivers/vfio/pci/vfio_pci_core.c | 153 | ||||
| -rw-r--r-- | include/linux/vfio_pci_core.h | 1 |
2 files changed, 146 insertions, 8 deletions
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 207ede189c2a..9c612162653f 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -277,11 +277,100 @@ int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev, pci_power_t stat return ret; } +static int vfio_pci_runtime_pm_entry(struct vfio_pci_core_device *vdev) +{ + /* + * The vdev power related flags are protected with 'memory_lock' + * semaphore. + */ + vfio_pci_zap_and_down_write_memory_lock(vdev); + if (vdev->pm_runtime_engaged) { + up_write(&vdev->memory_lock); + return -EINVAL; + } + + vdev->pm_runtime_engaged = true; + pm_runtime_put_noidle(&vdev->pdev->dev); + up_write(&vdev->memory_lock); + + return 0; +} + +static int vfio_pci_core_pm_entry(struct vfio_device *device, u32 flags, + void __user *arg, size_t argsz) +{ + struct vfio_pci_core_device *vdev = + container_of(device, struct vfio_pci_core_device, vdev); + int ret; + + ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_SET, 0); + if (ret != 1) + return ret; + + /* + * Inside vfio_pci_runtime_pm_entry(), only the runtime PM usage count + * will be decremented. The pm_runtime_put() will be invoked again + * while returning from the ioctl and then the device can go into + * runtime suspended state. + */ + return vfio_pci_runtime_pm_entry(vdev); +} + +static void __vfio_pci_runtime_pm_exit(struct vfio_pci_core_device *vdev) +{ + if (vdev->pm_runtime_engaged) { + vdev->pm_runtime_engaged = false; + pm_runtime_get_noresume(&vdev->pdev->dev); + } +} + +static void vfio_pci_runtime_pm_exit(struct vfio_pci_core_device *vdev) +{ + /* + * The vdev power related flags are protected with 'memory_lock' + * semaphore. + */ + down_write(&vdev->memory_lock); + __vfio_pci_runtime_pm_exit(vdev); + up_write(&vdev->memory_lock); +} + +static int vfio_pci_core_pm_exit(struct vfio_device *device, u32 flags, + void __user *arg, size_t argsz) +{ + struct vfio_pci_core_device *vdev = + container_of(device, struct vfio_pci_core_device, vdev); + int ret; + + ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_SET, 0); + if (ret != 1) + return ret; + + /* + * The device is always in the active state here due to pm wrappers + * around ioctls. + */ + vfio_pci_runtime_pm_exit(vdev); + return 0; +} + #ifdef CONFIG_PM static int vfio_pci_core_runtime_suspend(struct device *dev) { struct vfio_pci_core_device *vdev = dev_get_drvdata(dev); + down_write(&vdev->memory_lock); + /* + * The user can move the device into D3hot state before invoking + * power management IOCTL. Move the device into D0 state here and then + * the pci-driver core runtime PM suspend function will move the device + * into the low power state. Also, for the devices which have + * NoSoftRst-, it will help in restoring the original state + * (saved locally in 'vdev->pm_save'). + */ + vfio_pci_set_power_state(vdev, PCI_D0); + up_write(&vdev->memory_lock); + /* * If INTx is enabled, then mask INTx before going into the runtime * suspended state and unmask the same in the runtime resume. @@ -418,6 +507,18 @@ void vfio_pci_core_disable(struct vfio_pci_core_device *vdev) /* * This function can be invoked while the power state is non-D0. + * This non-D0 power state can be with or without runtime PM. + * vfio_pci_runtime_pm_exit() will internally increment the usage + * count corresponding to pm_runtime_put() called during low power + * feature entry and then pm_runtime_resume() will wake up the device, + * if the device has already gone into the suspended state. Otherwise, + * the vfio_pci_set_power_state() will change the device power state + * to D0. + */ + vfio_pci_runtime_pm_exit(vdev); + pm_runtime_resume(&pdev->dev); + + /* * This function calls __pci_reset_function_locked() which internally * can use pci_pm_reset() for the function reset. pci_pm_reset() will * fail if the power state is non-D0. Also, for the devices which @@ -1273,6 +1374,10 @@ int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz) { switch (flags & VFIO_DEVICE_FEATURE_MASK) { + case VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY: + return vfio_pci_core_pm_entry(device, flags, arg, argsz); + case VFIO_DEVICE_FEATURE_LOW_POWER_EXIT: + return vfio_pci_core_pm_exit(device, flags, arg, argsz); case VFIO_DEVICE_FEATURE_PCI_VF_TOKEN: return vfio_pci_core_feature_token(device, flags, arg, argsz); default: @@ -1285,31 +1390,47 @@ static ssize_t vfio_pci_rw(struct vfio_pci_core_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite) { unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); + int ret; if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions) return -EINVAL; + ret = pm_runtime_resume_and_get(&vdev->pdev->dev); + if (ret) { + pci_info_ratelimited(vdev->pdev, "runtime resume failed %d\n", + ret); + return -EIO; + } + switch (index) { case VFIO_PCI_CONFIG_REGION_INDEX: - return vfio_pci_config_rw(vdev, buf, count, ppos, iswrite); + ret = vfio_pci_config_rw(vdev, buf, count, ppos, iswrite); + break; case VFIO_PCI_ROM_REGION_INDEX: if (iswrite) - return -EINVAL; - return vfio_pci_bar_rw(vdev, buf, count, ppos, false); + ret = -EINVAL; + else + ret = vfio_pci_bar_rw(vdev, buf, count, ppos, false); + break; case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX: - return vfio_pci_bar_rw(vdev, buf, count, ppos, iswrite); + ret = vfio_pci_bar_rw(vdev, buf, count, ppos, iswrite); + break; case VFIO_PCI_VGA_REGION_INDEX: - return vfio_pci_vga_rw(vdev, buf, count, ppos, iswrite); + ret = vfio_pci_vga_rw(vdev, buf, count, ppos, iswrite); + break; + default: index -= VFIO_PCI_NUM_REGIONS; - return vdev->region[index].ops->rw(vdev, buf, + ret = vdev->region[index].ops->rw(vdev, buf, count, ppos, iswrite); + break; } - return -EINVAL; + pm_runtime_put(&vdev->pdev->dev); + return ret; } ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *buf, @@ -1504,7 +1625,11 @@ static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf) mutex_lock(&vdev->vma_lock); down_read(&vdev->memory_lock); - if (!__vfio_pci_memory_enabled(vdev)) { + /* + * Memory region cannot be accessed if the low power feature is engaged + * or memory access is disabled. + */ + if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev)) { ret = VM_FAULT_SIGBUS; goto up_out; } @@ -2219,6 +2344,15 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, goto err_unlock; } + /* + * Some of the devices in the dev_set can be in the runtime suspended + * state. Increment the usage count for all the devices in the dev_set + * before reset and decrement the same after reset. + */ + ret = vfio_pci_dev_set_pm_runtime_get(dev_set); + if (ret) + goto err_unlock; + list_for_each_entry(cur_vma, &dev_set->device_list, vdev.dev_set_list) { /* * Test whether all the affected devices are contained by the @@ -2274,6 +2408,9 @@ err_undo: else mutex_unlock(&cur->vma_lock); } + + list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) + pm_runtime_put(&cur->pdev->dev); err_unlock: mutex_unlock(&dev_set->lock); return ret; diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index a0f1f36e42a2..1025d53fde0b 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -79,6 +79,7 @@ struct vfio_pci_core_device { bool nointx; bool needs_pm_restore; bool pm_intx_masked; + bool pm_runtime_engaged; struct pci_saved_state *pci_saved_state; struct pci_saved_state *pm_save; int ioeventfds_nr; |
