diff options
Diffstat (limited to 'arch/s390')
29 files changed, 471 insertions, 538 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 938e5df75b2d..0e5fad5f06ca 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -238,6 +238,7 @@ config S390 select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_PREEMPT_DYNAMIC_KEY select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE @@ -254,6 +255,7 @@ config S390 select HOTPLUG_SMT select IOMMU_HELPER if PCI select IOMMU_SUPPORT if PCI + select IRQ_MSI_LIB if PCI select KASAN_VMALLOC if KASAN select LOCK_MM_AND_FIND_VMA select MMU_GATHER_MERGE_VMAS diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index cea3de4dce8c..7d6cc4c85af0 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -16,7 +16,6 @@ #include "decompressor.h" #include "boot.h" -#define INVALID_PHYS_ADDR (~(phys_addr_t)0) struct ctlreg __bootdata_preserved(s390_invalid_asce); #ifdef CONFIG_PROC_FS @@ -245,22 +244,10 @@ static void *boot_crst_alloc(unsigned long val) static pte_t *boot_pte_alloc(void) { - static void *pte_leftover; pte_t *pte; - /* - * handling pte_leftovers this way helps to avoid memory fragmentation - * during POPULATE_KASAN_MAP_SHADOW when EDAT is off - */ - if (!pte_leftover) { - pte_leftover = (void *)physmem_alloc_or_die(RR_VMEM, PAGE_SIZE, PAGE_SIZE); - pte = pte_leftover + _PAGE_TABLE_SIZE; - __arch_set_page_dat(pte, 1); - } else { - pte = pte_leftover; - pte_leftover = NULL; - } - + pte = (void *)physmem_alloc_or_die(RR_VMEM, PAGE_SIZE, PAGE_SIZE); + __arch_set_page_dat(pte, 1); memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); return pte; } diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h index 4dc2e068e0ff..2bb7104124ca 100644 --- a/arch/s390/hypfs/hypfs.h +++ b/arch/s390/hypfs/hypfs.h @@ -22,11 +22,9 @@ extern struct dentry *hypfs_mkdir(struct dentry *parent, const char *name); -extern struct dentry *hypfs_create_u64(struct dentry *dir, const char *name, - __u64 value); +extern int hypfs_create_u64(struct dentry *dir, const char *name, __u64 value); -extern struct dentry *hypfs_create_str(struct dentry *dir, const char *name, - char *string); +extern int hypfs_create_str(struct dentry *dir, const char *name, char *string); /* LPAR Hypervisor */ extern int hypfs_diag_init(void); diff --git a/arch/s390/hypfs/hypfs_diag_fs.c b/arch/s390/hypfs/hypfs_diag_fs.c index 013da4ff9802..39621c326010 100644 --- a/arch/s390/hypfs/hypfs_diag_fs.c +++ b/arch/s390/hypfs/hypfs_diag_fs.c @@ -203,7 +203,7 @@ static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info) { struct dentry *cpu_dir; char buffer[TMP_SIZE]; - void *rc; + int rc; snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_get_info_type(), cpu_info)); @@ -213,22 +213,21 @@ static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info) rc = hypfs_create_u64(cpu_dir, "mgmtime", cpu_info__acc_time(diag204_get_info_type(), cpu_info) - cpu_info__lp_time(diag204_get_info_type(), cpu_info)); - if (IS_ERR(rc)) - return PTR_ERR(rc); + if (rc) + return rc; rc = hypfs_create_u64(cpu_dir, "cputime", cpu_info__lp_time(diag204_get_info_type(), cpu_info)); - if (IS_ERR(rc)) - return PTR_ERR(rc); + if (rc) + return rc; if (diag204_get_info_type() == DIAG204_INFO_EXT) { rc = hypfs_create_u64(cpu_dir, "onlinetime", cpu_info__online_time(diag204_get_info_type(), cpu_info)); - if (IS_ERR(rc)) - return PTR_ERR(rc); + if (rc) + return rc; } diag224_idx2name(cpu_info__ctidx(diag204_get_info_type(), cpu_info), buffer); - rc = hypfs_create_str(cpu_dir, "type", buffer); - return PTR_ERR_OR_ZERO(rc); + return hypfs_create_str(cpu_dir, "type", buffer); } static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr) @@ -263,7 +262,7 @@ static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info) { struct dentry *cpu_dir; char buffer[TMP_SIZE]; - void *rc; + int rc; snprintf(buffer, TMP_SIZE, "%i", phys_cpu__cpu_addr(diag204_get_info_type(), cpu_info)); @@ -272,11 +271,10 @@ static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info) return PTR_ERR(cpu_dir); rc = hypfs_create_u64(cpu_dir, "mgmtime", phys_cpu__mgm_time(diag204_get_info_type(), cpu_info)); - if (IS_ERR(rc)) - return PTR_ERR(rc); + if (rc) + return rc; diag224_idx2name(phys_cpu__ctidx(diag204_get_info_type(), cpu_info), buffer); - rc = hypfs_create_str(cpu_dir, "type", buffer); - return PTR_ERR_OR_ZERO(rc); + return hypfs_create_str(cpu_dir, "type", buffer); } static void *hypfs_create_phys_files(struct dentry *parent_dir, void *phys_hdr) @@ -315,41 +313,25 @@ int hypfs_diag_create_files(struct dentry *root) return rc; systems_dir = hypfs_mkdir(root, "systems"); - if (IS_ERR(systems_dir)) { - rc = PTR_ERR(systems_dir); - goto err_out; - } + if (IS_ERR(systems_dir)) + return PTR_ERR(systems_dir); time_hdr = (struct x_info_blk_hdr *)buffer; part_hdr = time_hdr + info_blk_hdr__size(diag204_get_info_type()); for (i = 0; i < info_blk_hdr__npar(diag204_get_info_type(), time_hdr); i++) { part_hdr = hypfs_create_lpar_files(systems_dir, part_hdr); - if (IS_ERR(part_hdr)) { - rc = PTR_ERR(part_hdr); - goto err_out; - } + if (IS_ERR(part_hdr)) + return PTR_ERR(part_hdr); } if (info_blk_hdr__flags(diag204_get_info_type(), time_hdr) & DIAG204_LPAR_PHYS_FLG) { ptr = hypfs_create_phys_files(root, part_hdr); - if (IS_ERR(ptr)) { - rc = PTR_ERR(ptr); - goto err_out; - } + if (IS_ERR(ptr)) + return PTR_ERR(ptr); } hyp_dir = hypfs_mkdir(root, "hyp"); - if (IS_ERR(hyp_dir)) { - rc = PTR_ERR(hyp_dir); - goto err_out; - } - ptr = hypfs_create_str(hyp_dir, "type", "LPAR Hypervisor"); - if (IS_ERR(ptr)) { - rc = PTR_ERR(ptr); - goto err_out; - } - rc = 0; - -err_out: - return rc; + if (IS_ERR(hyp_dir)) + return PTR_ERR(hyp_dir); + return hypfs_create_str(hyp_dir, "type", "LPAR Hypervisor"); } /* Diagnose 224 functions */ diff --git a/arch/s390/hypfs/hypfs_vm_fs.c b/arch/s390/hypfs/hypfs_vm_fs.c index 6011289afa8c..a149a9f92e40 100644 --- a/arch/s390/hypfs/hypfs_vm_fs.c +++ b/arch/s390/hypfs/hypfs_vm_fs.c @@ -19,10 +19,9 @@ #define ATTRIBUTE(dir, name, member) \ do { \ - void *rc; \ - rc = hypfs_create_u64(dir, name, member); \ - if (IS_ERR(rc)) \ - return PTR_ERR(rc); \ + int rc = hypfs_create_u64(dir, name, member); \ + if (rc) \ + return rc; \ } while (0) static int hypfs_vm_create_guest(struct dentry *systems_dir, @@ -85,7 +84,7 @@ static int hypfs_vm_create_guest(struct dentry *systems_dir, int hypfs_vm_create_files(struct dentry *root) { - struct dentry *dir, *file; + struct dentry *dir; struct diag2fc_data *data; unsigned int count = 0; int rc, i; @@ -100,11 +99,9 @@ int hypfs_vm_create_files(struct dentry *root) rc = PTR_ERR(dir); goto failed; } - file = hypfs_create_str(dir, "type", "z/VM Hypervisor"); - if (IS_ERR(file)) { - rc = PTR_ERR(file); + rc = hypfs_create_str(dir, "type", "z/VM Hypervisor"); + if (rc) goto failed; - } /* physical cpus */ dir = hypfs_mkdir(root, "cpus"); @@ -112,11 +109,9 @@ int hypfs_vm_create_files(struct dentry *root) rc = PTR_ERR(dir); goto failed; } - file = hypfs_create_u64(dir, "count", data->lcpus); - if (IS_ERR(file)) { - rc = PTR_ERR(file); + rc = hypfs_create_u64(dir, "count", data->lcpus); + if (rc) goto failed; - } /* guests */ dir = hypfs_mkdir(root, "systems"); diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index ee5cfa8f71a0..3a47c2e24b6e 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -60,33 +60,17 @@ static void hypfs_update_update(struct super_block *sb) static void hypfs_add_dentry(struct dentry *dentry) { - dentry->d_fsdata = hypfs_last_dentry; - hypfs_last_dentry = dentry; -} - -static void hypfs_remove(struct dentry *dentry) -{ - struct dentry *parent; - - parent = dentry->d_parent; - inode_lock(d_inode(parent)); - if (simple_positive(dentry)) { - if (d_is_dir(dentry)) - simple_rmdir(d_inode(parent), dentry); - else - simple_unlink(d_inode(parent), dentry); + if (IS_ROOT(dentry->d_parent)) { + dentry->d_fsdata = hypfs_last_dentry; + hypfs_last_dentry = dentry; } - d_drop(dentry); - dput(dentry); - inode_unlock(d_inode(parent)); } -static void hypfs_delete_tree(struct dentry *root) +static void hypfs_delete_tree(void) { while (hypfs_last_dentry) { - struct dentry *next_dentry; - next_dentry = hypfs_last_dentry->d_fsdata; - hypfs_remove(hypfs_last_dentry); + struct dentry *next_dentry = hypfs_last_dentry->d_fsdata; + simple_recursive_removal(hypfs_last_dentry, NULL); hypfs_last_dentry = next_dentry; } } @@ -183,14 +167,14 @@ static ssize_t hypfs_write_iter(struct kiocb *iocb, struct iov_iter *from) rc = -EBUSY; goto out; } - hypfs_delete_tree(sb->s_root); + hypfs_delete_tree(); if (machine_is_vm()) rc = hypfs_vm_create_files(sb->s_root); else rc = hypfs_diag_create_files(sb->s_root); if (rc) { pr_err("Updating the hypfs tree failed\n"); - hypfs_delete_tree(sb->s_root); + hypfs_delete_tree(); goto out; } hypfs_update_update(sb); @@ -325,13 +309,9 @@ static void hypfs_kill_super(struct super_block *sb) { struct hypfs_sb_info *sb_info = sb->s_fs_info; - if (sb->s_root) - hypfs_delete_tree(sb->s_root); - if (sb_info && sb_info->update_file) - hypfs_remove(sb_info->update_file); - kfree(sb->s_fs_info); - sb->s_fs_info = NULL; - kill_litter_super(sb); + hypfs_last_dentry = NULL; + kill_anon_super(sb); + kfree(sb_info); } static struct dentry *hypfs_create_file(struct dentry *parent, const char *name, @@ -340,17 +320,13 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name, struct dentry *dentry; struct inode *inode; - inode_lock(d_inode(parent)); - dentry = lookup_noperm(&QSTR(name), parent); - if (IS_ERR(dentry)) { - dentry = ERR_PTR(-ENOMEM); - goto fail; - } + dentry = simple_start_creating(parent, name); + if (IS_ERR(dentry)) + return ERR_PTR(-ENOMEM); inode = hypfs_make_inode(parent->d_sb, mode); if (!inode) { - dput(dentry); - dentry = ERR_PTR(-ENOMEM); - goto fail; + simple_done_creating(dentry); + return ERR_PTR(-ENOMEM); } if (S_ISREG(mode)) { inode->i_fop = &hypfs_file_ops; @@ -365,11 +341,9 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name, } else BUG(); inode->i_private = data; - d_instantiate(dentry, inode); - dget(dentry); -fail: - inode_unlock(d_inode(parent)); - return dentry; + d_make_persistent(dentry, inode); + simple_done_creating(dentry); + return dentry; // borrowed } struct dentry *hypfs_mkdir(struct dentry *parent, const char *name) @@ -397,8 +371,7 @@ static struct dentry *hypfs_create_update_file(struct dentry *dir) return dentry; } -struct dentry *hypfs_create_u64(struct dentry *dir, - const char *name, __u64 value) +int hypfs_create_u64(struct dentry *dir, const char *name, __u64 value) { char *buffer; char tmp[TMP_SIZE]; @@ -407,35 +380,34 @@ struct dentry *hypfs_create_u64(struct dentry *dir, snprintf(tmp, TMP_SIZE, "%llu\n", (unsigned long long int)value); buffer = kstrdup(tmp, GFP_KERNEL); if (!buffer) - return ERR_PTR(-ENOMEM); + return -ENOMEM; dentry = hypfs_create_file(dir, name, buffer, S_IFREG | REG_FILE_MODE); if (IS_ERR(dentry)) { kfree(buffer); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } hypfs_add_dentry(dentry); - return dentry; + return 0; } -struct dentry *hypfs_create_str(struct dentry *dir, - const char *name, char *string) +int hypfs_create_str(struct dentry *dir, const char *name, char *string) { char *buffer; struct dentry *dentry; buffer = kmalloc(strlen(string) + 2, GFP_KERNEL); if (!buffer) - return ERR_PTR(-ENOMEM); + return -ENOMEM; sprintf(buffer, "%s\n", string); dentry = hypfs_create_file(dir, name, buffer, S_IFREG | REG_FILE_MODE); if (IS_ERR(dentry)) { kfree(buffer); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } hypfs_add_dentry(dentry); - return dentry; + return 0; } static const struct file_operations hypfs_file_ops = { diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h index acb4b13d98c5..ee9221bb5d18 100644 --- a/arch/s390/include/asm/bug.h +++ b/arch/s390/include/asm/bug.h @@ -4,11 +4,14 @@ #include <linux/stringify.h> +#ifdef CONFIG_BUG + #ifndef CONFIG_DEBUG_BUGVERBOSE #define _BUGVERBOSE_LOCATION(file, line) #else #define __BUGVERBOSE_LOCATION(file, line) \ .pushsection .rodata.str, "aMS", @progbits, 1; \ + .align 2; \ 10002: .ascii file "\0"; \ .popsection; \ \ @@ -52,6 +55,8 @@ do { \ #define HAVE_ARCH_BUG +#endif /* CONFIG_BUG */ + #include <asm-generic/bug.h> #endif /* _ASM_S390_BUG_H */ diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index c2ba3d4398c5..ae1223264d3c 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -146,6 +146,7 @@ struct kvm_vcpu_stat { u64 instruction_diagnose_500; u64 instruction_diagnose_other; u64 pfault_sync; + u64 signal_exits; }; #define PGM_OPERATION 0x01 @@ -631,10 +632,8 @@ struct kvm_s390_pv { struct mmu_notifier mmu_notifier; }; -struct kvm_arch{ - void *sca; - int use_esca; - rwlock_t sca_lock; +struct kvm_arch { + struct esca_block *sca; debug_info_t *dbf; struct kvm_s390_float_interrupt float_int; struct kvm_device *flic; @@ -650,6 +649,7 @@ struct kvm_arch{ int user_sigp; int user_stsi; int user_instr0; + int user_operexec; struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS]; wait_queue_head_t ipte_wq; int ipte_lock_count; diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 9240a363c893..c1d63b613bf9 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -166,6 +166,8 @@ static inline int page_reset_referenced(unsigned long addr) return CC_TRANSFORM(cc); } +int split_pud_page(pud_t *pudp, unsigned long addr); + /* Bits int the storage key */ #define _PAGE_CHANGED 0x02 /* HW changed bit */ #define _PAGE_REFERENCED 0x04 /* HW referenced bit */ diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index a32f465ecf73..c0ff19dab580 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -5,6 +5,7 @@ #include <linux/pci.h> #include <linux/mutex.h> #include <linux/iommu.h> +#include <linux/irqdomain.h> #include <linux/pci_hotplug.h> #include <asm/pci_clp.h> #include <asm/pci_debug.h> @@ -109,6 +110,7 @@ struct zpci_bus { struct list_head resources; struct list_head bus_next; struct resource bus_resource; + struct irq_domain *msi_parent_domain; int topo; /* TID if topo_is_tid, PCHID otherwise */ int domain_nr; u8 multifunction : 1; @@ -310,6 +312,9 @@ int zpci_dma_exit_device(struct zpci_dev *zdev); /* IRQ */ int __init zpci_irq_init(void); void __init zpci_irq_exit(void); +int zpci_set_irq(struct zpci_dev *zdev); +int zpci_create_parent_msi_domain(struct zpci_bus *zbus); +void zpci_remove_parent_msi_domain(struct zpci_bus *zbus); /* FMB */ int zpci_fmb_enable_device(struct zpci_dev *); diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index 810a6b9d9628..c9ae680a28af 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -66,6 +66,7 @@ struct stack_frame { unsigned long sie_flags; unsigned long sie_control_block_phys; unsigned long sie_guest_asce; + unsigned long sie_irq; }; }; unsigned long gprs[10]; diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index cfe27f6579e3..e1a5b5b54e4f 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -67,6 +67,7 @@ int main(void) OFFSET(__SF_SIE_FLAGS, stack_frame, sie_flags); OFFSET(__SF_SIE_CONTROL_PHYS, stack_frame, sie_control_block_phys); OFFSET(__SF_SIE_GUEST_ASCE, stack_frame, sie_guest_asce); + OFFSET(__SF_SIE_IRQ, stack_frame, sie_irq); DEFINE(STACK_FRAME_OVERHEAD, sizeof(struct stack_frame)); BLANK(); OFFSET(__SFUSER_BACKCHAIN, stack_frame_user, back_chain); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index c360087807d8..b7f1553d9ee5 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -193,6 +193,7 @@ SYM_FUNC_START(__sie64a) mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r14) # copy thread flags lmg %r0,%r13,0(%r4) # load guest gprs 0-13 mvi __TI_sie(%r14),1 + stosm __SF_SIE_IRQ(%r15),0x03 # enable interrupts lctlg %c1,%c1,__SF_SIE_GUEST_ASCE(%r15) # load primary asce lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now @@ -216,6 +217,7 @@ SYM_FUNC_START(__sie64a) lg %r14,__LC_CURRENT(%r14) mvi __TI_sie(%r14),0 SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL) + stnsm __SF_SIE_IRQ(%r15),0xfc # disable interrupts lg %r14,__SF_SIE_SAVEAREA(%r15) # load guest register save area stmg %r0,%r13,0(%r14) # save guest gprs 0-13 xgr %r0,%r0 # clear guest registers to diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index cae908d64550..f4ec8c1ce214 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -20,7 +20,6 @@ config KVM def_tristate y prompt "Kernel-based Virtual Machine (KVM) support" select HAVE_KVM_CPU_RELAX_INTERCEPT - select HAVE_KVM_VCPU_ASYNC_IOCTL select KVM_ASYNC_PF select KVM_ASYNC_PF_SYNC select KVM_COMMON @@ -30,6 +29,7 @@ config KVM select HAVE_KVM_NO_POLL select KVM_VFIO select MMU_NOTIFIER + select VIRT_XFER_TO_GUEST_WORK help Support hosting paravirtualized guest machines using the SIE virtualization capability on the mainframe. This should work diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 21c2e61fece4..41ca6b0ee7a9 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -109,14 +109,9 @@ struct aste { int ipte_lock_held(struct kvm *kvm) { - if (sclp.has_siif) { - int rc; + if (sclp.has_siif) + return kvm->arch.sca->ipte_control.kh != 0; - read_lock(&kvm->arch.sca_lock); - rc = kvm_s390_get_ipte_control(kvm)->kh != 0; - read_unlock(&kvm->arch.sca_lock); - return rc; - } return kvm->arch.ipte_lock_count != 0; } @@ -129,19 +124,16 @@ static void ipte_lock_simple(struct kvm *kvm) if (kvm->arch.ipte_lock_count > 1) goto out; retry: - read_lock(&kvm->arch.sca_lock); - ic = kvm_s390_get_ipte_control(kvm); + ic = &kvm->arch.sca->ipte_control; old = READ_ONCE(*ic); do { if (old.k) { - read_unlock(&kvm->arch.sca_lock); cond_resched(); goto retry; } new = old; new.k = 1; } while (!try_cmpxchg(&ic->val, &old.val, new.val)); - read_unlock(&kvm->arch.sca_lock); out: mutex_unlock(&kvm->arch.ipte_mutex); } @@ -154,14 +146,12 @@ static void ipte_unlock_simple(struct kvm *kvm) kvm->arch.ipte_lock_count--; if (kvm->arch.ipte_lock_count) goto out; - read_lock(&kvm->arch.sca_lock); - ic = kvm_s390_get_ipte_control(kvm); + ic = &kvm->arch.sca->ipte_control; old = READ_ONCE(*ic); do { new = old; new.k = 0; } while (!try_cmpxchg(&ic->val, &old.val, new.val)); - read_unlock(&kvm->arch.sca_lock); wake_up(&kvm->arch.ipte_wq); out: mutex_unlock(&kvm->arch.ipte_mutex); @@ -172,12 +162,10 @@ static void ipte_lock_siif(struct kvm *kvm) union ipte_control old, new, *ic; retry: - read_lock(&kvm->arch.sca_lock); - ic = kvm_s390_get_ipte_control(kvm); + ic = &kvm->arch.sca->ipte_control; old = READ_ONCE(*ic); do { if (old.kg) { - read_unlock(&kvm->arch.sca_lock); cond_resched(); goto retry; } @@ -185,15 +173,13 @@ retry: new.k = 1; new.kh++; } while (!try_cmpxchg(&ic->val, &old.val, new.val)); - read_unlock(&kvm->arch.sca_lock); } static void ipte_unlock_siif(struct kvm *kvm) { union ipte_control old, new, *ic; - read_lock(&kvm->arch.sca_lock); - ic = kvm_s390_get_ipte_control(kvm); + ic = &kvm->arch.sca->ipte_control; old = READ_ONCE(*ic); do { new = old; @@ -201,7 +187,6 @@ static void ipte_unlock_siif(struct kvm *kvm) if (!new.kh) new.k = 0; } while (!try_cmpxchg(&ic->val, &old.val, new.val)); - read_unlock(&kvm->arch.sca_lock); if (!new.kh) wake_up(&kvm->arch.ipte_wq); } diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index c7908950c1f4..420ae62977e2 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -471,6 +471,9 @@ static int handle_operexc(struct kvm_vcpu *vcpu) if (vcpu->arch.sie_block->ipa == 0xb256) return handle_sthyi(vcpu); + if (vcpu->kvm->arch.user_operexec) + return -EOPNOTSUPP; + if (vcpu->arch.sie_block->ipa == 0 && vcpu->kvm->arch.user_instr0) return -EOPNOTSUPP; rc = read_guest_lc(vcpu, __LC_PGM_NEW_PSW, &newpsw, sizeof(psw_t)); diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index f55574af98cc..249cdc822ec5 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -44,70 +44,34 @@ static struct kvm_s390_gib *gib; /* handle external calls via sigp interpretation facility */ static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id) { - int c, scn; + struct esca_block *sca = vcpu->kvm->arch.sca; + union esca_sigp_ctrl sigp_ctrl = sca->cpu[vcpu->vcpu_id].sigp_ctrl; if (!kvm_s390_test_cpuflags(vcpu, CPUSTAT_ECALL_PEND)) return 0; BUG_ON(!kvm_s390_use_sca_entries()); - read_lock(&vcpu->kvm->arch.sca_lock); - if (vcpu->kvm->arch.use_esca) { - struct esca_block *sca = vcpu->kvm->arch.sca; - union esca_sigp_ctrl sigp_ctrl = - sca->cpu[vcpu->vcpu_id].sigp_ctrl; - - c = sigp_ctrl.c; - scn = sigp_ctrl.scn; - } else { - struct bsca_block *sca = vcpu->kvm->arch.sca; - union bsca_sigp_ctrl sigp_ctrl = - sca->cpu[vcpu->vcpu_id].sigp_ctrl; - - c = sigp_ctrl.c; - scn = sigp_ctrl.scn; - } - read_unlock(&vcpu->kvm->arch.sca_lock); if (src_id) - *src_id = scn; + *src_id = sigp_ctrl.scn; - return c; + return sigp_ctrl.c; } static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) { + struct esca_block *sca = vcpu->kvm->arch.sca; + union esca_sigp_ctrl *sigp_ctrl = &sca->cpu[vcpu->vcpu_id].sigp_ctrl; + union esca_sigp_ctrl old_val, new_val = {.scn = src_id, .c = 1}; int expect, rc; BUG_ON(!kvm_s390_use_sca_entries()); - read_lock(&vcpu->kvm->arch.sca_lock); - if (vcpu->kvm->arch.use_esca) { - struct esca_block *sca = vcpu->kvm->arch.sca; - union esca_sigp_ctrl *sigp_ctrl = - &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); - union esca_sigp_ctrl new_val = {0}, old_val; - - old_val = READ_ONCE(*sigp_ctrl); - new_val.scn = src_id; - new_val.c = 1; - old_val.c = 0; - - expect = old_val.value; - rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value); - } else { - struct bsca_block *sca = vcpu->kvm->arch.sca; - union bsca_sigp_ctrl *sigp_ctrl = - &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); - union bsca_sigp_ctrl new_val = {0}, old_val; - old_val = READ_ONCE(*sigp_ctrl); - new_val.scn = src_id; - new_val.c = 1; - old_val.c = 0; + old_val = READ_ONCE(*sigp_ctrl); + old_val.c = 0; - expect = old_val.value; - rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value); - } - read_unlock(&vcpu->kvm->arch.sca_lock); + expect = old_val.value; + rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value); if (rc != expect) { /* another external call is pending */ @@ -119,24 +83,14 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) static void sca_clear_ext_call(struct kvm_vcpu *vcpu) { + struct esca_block *sca = vcpu->kvm->arch.sca; + union esca_sigp_ctrl *sigp_ctrl = &sca->cpu[vcpu->vcpu_id].sigp_ctrl; + if (!kvm_s390_use_sca_entries()) return; kvm_s390_clear_cpuflags(vcpu, CPUSTAT_ECALL_PEND); - read_lock(&vcpu->kvm->arch.sca_lock); - if (vcpu->kvm->arch.use_esca) { - struct esca_block *sca = vcpu->kvm->arch.sca; - union esca_sigp_ctrl *sigp_ctrl = - &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); - - WRITE_ONCE(sigp_ctrl->value, 0); - } else { - struct bsca_block *sca = vcpu->kvm->arch.sca; - union bsca_sigp_ctrl *sigp_ctrl = - &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); - WRITE_ONCE(sigp_ctrl->value, 0); - } - read_unlock(&vcpu->kvm->arch.sca_lock); + WRITE_ONCE(sigp_ctrl->value, 0); } int psw_extint_disabled(struct kvm_vcpu *vcpu) @@ -1223,7 +1177,7 @@ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - if (!sclp.has_sigpif) + if (!kvm_s390_use_sca_entries()) return test_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs); return sca_ext_call_pending(vcpu, NULL); @@ -1548,7 +1502,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) if (kvm_get_vcpu_by_id(vcpu->kvm, src_id) == NULL) return -EINVAL; - if (sclp.has_sigpif && !kvm_s390_pv_cpu_get_handle(vcpu)) + if (kvm_s390_use_sca_entries() && !kvm_s390_pv_cpu_get_handle(vcpu)) return sca_inject_ext_call(vcpu, src_id); if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs)) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ff3a185f156c..56a50524b3ee 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -13,6 +13,7 @@ #define pr_fmt(fmt) "kvm-s390: " fmt #include <linux/compiler.h> +#include <linux/entry-virt.h> #include <linux/export.h> #include <linux/err.h> #include <linux/fs.h> @@ -184,7 +185,8 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, instruction_diagnose_308), STATS_DESC_COUNTER(VCPU, instruction_diagnose_500), STATS_DESC_COUNTER(VCPU, instruction_diagnose_other), - STATS_DESC_COUNTER(VCPU, pfault_sync) + STATS_DESC_COUNTER(VCPU, pfault_sync), + STATS_DESC_COUNTER(VCPU, signal_exits) }; const struct kvm_stats_header kvm_vcpu_stats_header = { @@ -271,7 +273,6 @@ debug_info_t *kvm_s390_dbf_uv; /* forward declarations */ static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, unsigned long end); -static int sca_switch_to_extended(struct kvm *kvm); static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta) { @@ -606,6 +607,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_SET_GUEST_DEBUG: case KVM_CAP_S390_DIAG318: case KVM_CAP_IRQFD_RESAMPLE: + case KVM_CAP_S390_USER_OPEREXEC: r = 1; break; case KVM_CAP_SET_GUEST_DEBUG2: @@ -631,11 +633,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_NR_VCPUS: case KVM_CAP_MAX_VCPUS: case KVM_CAP_MAX_VCPU_ID: - r = KVM_S390_BSCA_CPU_SLOTS; + /* + * Return the same value for KVM_CAP_MAX_VCPUS and + * KVM_CAP_MAX_VCPU_ID to conform with the KVM API. + */ + r = KVM_S390_ESCA_CPU_SLOTS; if (!kvm_s390_use_sca_entries()) r = KVM_MAX_VCPUS; - else if (sclp.has_esca && sclp.has_64bscao) - r = KVM_S390_ESCA_CPU_SLOTS; if (ext == KVM_CAP_NR_VCPUS) r = min_t(unsigned int, num_online_cpus(), r); break; @@ -919,6 +923,12 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s", r ? "(not available)" : "(success)"); break; + case KVM_CAP_S390_USER_OPEREXEC: + VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_OPEREXEC"); + kvm->arch.user_operexec = 1; + icpt_operexc_on_all_vcpus(kvm); + r = 0; + break; default: r = -EINVAL; break; @@ -1930,22 +1940,18 @@ static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) * Updates the Multiprocessor Topology-Change-Report bit to signal * the guest with a topology change. * This is only relevant if the topology facility is present. - * - * The SCA version, bsca or esca, doesn't matter as offset is the same. */ static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val) { union sca_utility new, old; - struct bsca_block *sca; + struct esca_block *sca; - read_lock(&kvm->arch.sca_lock); sca = kvm->arch.sca; old = READ_ONCE(sca->utility); do { new = old; new.mtcr = val; } while (!try_cmpxchg(&sca->utility.val, &old.val, new.val)); - read_unlock(&kvm->arch.sca_lock); } static int kvm_s390_set_topo_change_indication(struct kvm *kvm, @@ -1966,9 +1972,7 @@ static int kvm_s390_get_topo_change_indication(struct kvm *kvm, if (!test_kvm_facility(kvm, 11)) return -ENXIO; - read_lock(&kvm->arch.sca_lock); - topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr; - read_unlock(&kvm->arch.sca_lock); + topo = kvm->arch.sca->utility.mtcr; return put_user(topo, (u8 __user *)attr->addr); } @@ -2666,14 +2670,6 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) if (kvm_s390_pv_is_protected(kvm)) break; - /* - * FMT 4 SIE needs esca. As we never switch back to bsca from - * esca, we need no cleanup in the error cases below - */ - r = sca_switch_to_extended(kvm); - if (r) - break; - mmap_write_lock(kvm->mm); r = gmap_helper_disable_cow_sharing(); mmap_write_unlock(kvm->mm); @@ -3316,10 +3312,7 @@ static void kvm_s390_crypto_init(struct kvm *kvm) static void sca_dispose(struct kvm *kvm) { - if (kvm->arch.use_esca) - free_pages_exact(kvm->arch.sca, sizeof(struct esca_block)); - else - free_page((unsigned long)(kvm->arch.sca)); + free_pages_exact(kvm->arch.sca, sizeof(*kvm->arch.sca)); kvm->arch.sca = NULL; } @@ -3333,10 +3326,9 @@ void kvm_arch_free_vm(struct kvm *kvm) int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { - gfp_t alloc_flags = GFP_KERNEL_ACCOUNT; - int i, rc; + gfp_t alloc_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO; char debug_name[16]; - static unsigned long sca_offset; + int i, rc; rc = -EINVAL; #ifdef CONFIG_KVM_S390_UCONTROL @@ -3357,20 +3349,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (!sclp.has_64bscao) alloc_flags |= GFP_DMA; - rwlock_init(&kvm->arch.sca_lock); - /* start with basic SCA */ - kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); - if (!kvm->arch.sca) - goto out_err; mutex_lock(&kvm_lock); - sca_offset += 16; - if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) - sca_offset = 0; - kvm->arch.sca = (struct bsca_block *) - ((char *) kvm->arch.sca + sca_offset); + + kvm->arch.sca = alloc_pages_exact(sizeof(*kvm->arch.sca), alloc_flags); mutex_unlock(&kvm_lock); + if (!kvm->arch.sca) + goto out_err; - sprintf(debug_name, "kvm-%u", current->pid); + snprintf(debug_name, sizeof(debug_name), "kvm-%u", current->pid); kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long)); if (!kvm->arch.dbf) @@ -3547,133 +3533,38 @@ static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) static void sca_del_vcpu(struct kvm_vcpu *vcpu) { + struct esca_block *sca = vcpu->kvm->arch.sca; + if (!kvm_s390_use_sca_entries()) return; - read_lock(&vcpu->kvm->arch.sca_lock); - if (vcpu->kvm->arch.use_esca) { - struct esca_block *sca = vcpu->kvm->arch.sca; - clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); - sca->cpu[vcpu->vcpu_id].sda = 0; - } else { - struct bsca_block *sca = vcpu->kvm->arch.sca; - - clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); - sca->cpu[vcpu->vcpu_id].sda = 0; - } - read_unlock(&vcpu->kvm->arch.sca_lock); + clear_bit_inv(vcpu->vcpu_id, (unsigned long *)sca->mcn); + sca->cpu[vcpu->vcpu_id].sda = 0; } static void sca_add_vcpu(struct kvm_vcpu *vcpu) { - if (!kvm_s390_use_sca_entries()) { - phys_addr_t sca_phys = virt_to_phys(vcpu->kvm->arch.sca); - - /* we still need the basic sca for the ipte control */ - vcpu->arch.sie_block->scaoh = sca_phys >> 32; - vcpu->arch.sie_block->scaol = sca_phys; - return; - } - read_lock(&vcpu->kvm->arch.sca_lock); - if (vcpu->kvm->arch.use_esca) { - struct esca_block *sca = vcpu->kvm->arch.sca; - phys_addr_t sca_phys = virt_to_phys(sca); - - sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block); - vcpu->arch.sie_block->scaoh = sca_phys >> 32; - vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK; - vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; - set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); - } else { - struct bsca_block *sca = vcpu->kvm->arch.sca; - phys_addr_t sca_phys = virt_to_phys(sca); - - sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block); - vcpu->arch.sie_block->scaoh = sca_phys >> 32; - vcpu->arch.sie_block->scaol = sca_phys; - set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); - } - read_unlock(&vcpu->kvm->arch.sca_lock); -} - -/* Basic SCA to Extended SCA data copy routines */ -static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s) -{ - d->sda = s->sda; - d->sigp_ctrl.c = s->sigp_ctrl.c; - d->sigp_ctrl.scn = s->sigp_ctrl.scn; -} - -static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s) -{ - int i; + struct esca_block *sca = vcpu->kvm->arch.sca; + phys_addr_t sca_phys = virt_to_phys(sca); - d->ipte_control = s->ipte_control; - d->mcn[0] = s->mcn; - for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++) - sca_copy_entry(&d->cpu[i], &s->cpu[i]); -} + /* we still need the sca header for the ipte control */ + vcpu->arch.sie_block->scaoh = sca_phys >> 32; + vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK; + vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; -static int sca_switch_to_extended(struct kvm *kvm) -{ - struct bsca_block *old_sca = kvm->arch.sca; - struct esca_block *new_sca; - struct kvm_vcpu *vcpu; - unsigned long vcpu_idx; - u32 scaol, scaoh; - phys_addr_t new_sca_phys; - - if (kvm->arch.use_esca) - return 0; - - new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO); - if (!new_sca) - return -ENOMEM; - - new_sca_phys = virt_to_phys(new_sca); - scaoh = new_sca_phys >> 32; - scaol = new_sca_phys & ESCA_SCAOL_MASK; - - kvm_s390_vcpu_block_all(kvm); - write_lock(&kvm->arch.sca_lock); - - sca_copy_b_to_e(new_sca, old_sca); - - kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { - vcpu->arch.sie_block->scaoh = scaoh; - vcpu->arch.sie_block->scaol = scaol; - vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; - } - kvm->arch.sca = new_sca; - kvm->arch.use_esca = 1; - - write_unlock(&kvm->arch.sca_lock); - kvm_s390_vcpu_unblock_all(kvm); - - free_page((unsigned long)old_sca); + if (!kvm_s390_use_sca_entries()) + return; - VM_EVENT(kvm, 2, "Switched to ESCA (0x%p -> 0x%p)", - old_sca, kvm->arch.sca); - return 0; + set_bit_inv(vcpu->vcpu_id, (unsigned long *)sca->mcn); + sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block); } static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) { - int rc; - - if (!kvm_s390_use_sca_entries()) { - if (id < KVM_MAX_VCPUS) - return true; - return false; - } - if (id < KVM_S390_BSCA_CPU_SLOTS) - return true; - if (!sclp.has_esca || !sclp.has_64bscao) - return false; - - rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); + if (!kvm_s390_use_sca_entries()) + return id < KVM_MAX_VCPUS; - return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; + return id < KVM_S390_ESCA_CPU_SLOTS; } /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ @@ -3919,7 +3810,7 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->eca |= ECA_IB; if (sclp.has_siif) vcpu->arch.sie_block->eca |= ECA_SII; - if (sclp.has_sigpif) + if (kvm_s390_use_sca_entries()) vcpu->arch.sie_block->eca |= ECA_SIGPI; if (test_kvm_facility(vcpu->kvm, 129)) { vcpu->arch.sie_block->eca |= ECA_VX; @@ -4366,8 +4257,6 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { - int ret = 0; - vcpu_load(vcpu); vcpu->run->s.regs.fpc = fpu->fpc; @@ -4378,7 +4267,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); vcpu_put(vcpu); - return ret; + return 0; } int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) @@ -4786,9 +4675,6 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; - if (need_resched()) - schedule(); - if (!kvm_is_ucontrol(vcpu->kvm)) { rc = kvm_s390_deliver_pending_interrupts(vcpu); if (rc || guestdbg_exit_pending(vcpu)) @@ -5073,13 +4959,8 @@ int noinstr kvm_s390_enter_exit_sie(struct kvm_s390_sie_block *scb, * The guest_state_{enter,exit}_irqoff() functions inform lockdep and * tracing that entry to the guest will enable host IRQs, and exit from * the guest will disable host IRQs. - * - * We must not use lockdep/tracing/RCU in this critical section, so we - * use the low-level arch_local_irq_*() helpers to enable/disable IRQs. */ - arch_local_irq_enable(); ret = sie64a(scb, gprs, gasce); - arch_local_irq_disable(); guest_state_exit_irqoff(); @@ -5098,12 +4979,12 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) */ kvm_vcpu_srcu_read_lock(vcpu); - do { + while (true) { rc = vcpu_pre_run(vcpu); + kvm_vcpu_srcu_read_unlock(vcpu); if (rc || guestdbg_exit_pending(vcpu)) break; - kvm_vcpu_srcu_read_unlock(vcpu); /* * As PF_VCPU will be used in fault handler, between * guest_timing_enter_irqoff and guest_timing_exit_irqoff @@ -5115,7 +4996,17 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) sizeof(sie_page->pv_grregs)); } +xfer_to_guest_mode_check: local_irq_disable(); + xfer_to_guest_mode_prepare(); + if (xfer_to_guest_mode_work_pending()) { + local_irq_enable(); + rc = kvm_xfer_to_guest_mode_handle_work(vcpu); + if (rc) + break; + goto xfer_to_guest_mode_check; + } + guest_timing_enter_irqoff(); __disable_cpu_timer_accounting(vcpu); @@ -5145,9 +5036,12 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) kvm_vcpu_srcu_read_lock(vcpu); rc = vcpu_post_run(vcpu, exit_reason); - } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc); + if (rc || guestdbg_exit_pending(vcpu)) { + kvm_vcpu_srcu_read_unlock(vcpu); + break; + } + } - kvm_vcpu_srcu_read_unlock(vcpu); return rc; } @@ -5363,6 +5257,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) if (signal_pending(current) && !rc) { kvm_run->exit_reason = KVM_EXIT_INTR; + vcpu->stat.signal_exits++; rc = -EINTR; } @@ -5729,8 +5624,8 @@ static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu, return r; } -long kvm_arch_vcpu_async_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) +long kvm_arch_vcpu_unlocked_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) { struct kvm_vcpu *vcpu = filp->private_data; void __user *argp = (void __user *)arg; diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index c44fe0c3a097..65c950760993 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -570,13 +570,6 @@ void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu); int kvm_s390_handle_per_ifetch_icpt(struct kvm_vcpu *vcpu); int kvm_s390_handle_per_event(struct kvm_vcpu *vcpu); -/* support for Basic/Extended SCA handling */ -static inline union ipte_control *kvm_s390_get_ipte_control(struct kvm *kvm) -{ - struct bsca_block *sca = kvm->arch.sca; /* SCA version doesn't matter */ - - return &sca->ipte_control; -} static inline int kvm_s390_use_sca_entries(void) { /* @@ -584,7 +577,7 @@ static inline int kvm_s390_use_sca_entries(void) * might use the entries. By not setting the entries and keeping them * invalid, hardware will not access them but intercept. */ - return sclp.has_sigpif; + return sclp.has_sigpif && sclp.has_esca; } void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu, struct mcck_volatile_info *mcck_info); diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 347268f89f2f..b526621d2a1b 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -782,7 +782,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) else if ((gpa & ~0x1fffUL) == kvm_s390_get_prefix(vcpu)) rc = set_validity_icpt(scb_s, 0x0011U); else if ((gpa & PAGE_MASK) != - ((gpa + sizeof(struct bsca_block) - 1) & PAGE_MASK)) + ((gpa + offsetof(struct bsca_block, cpu[0]) - 1) & PAGE_MASK)) rc = set_validity_icpt(scb_s, 0x003bU); if (!rc) { rc = pin_guest_page(vcpu->kvm, gpa, &hpa); @@ -1180,12 +1180,23 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) current->thread.gmap_int_code = 0; barrier(); if (!kvm_s390_vcpu_sie_inhibited(vcpu)) { +xfer_to_guest_mode_check: local_irq_disable(); + xfer_to_guest_mode_prepare(); + if (xfer_to_guest_mode_work_pending()) { + local_irq_enable(); + rc = kvm_xfer_to_guest_mode_handle_work(vcpu); + if (rc) + goto skip_sie; + goto xfer_to_guest_mode_check; + } guest_timing_enter_irqoff(); rc = kvm_s390_enter_exit_sie(scb_s, vcpu->run->s.regs.gprs, vsie_page->gmap->asce); guest_timing_exit_irqoff(); local_irq_enable(); } + +skip_sie: barrier(); vcpu->arch.sie_block->prog0c &= ~PROG_IN_SIE; @@ -1345,13 +1356,11 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) * but rewind the PSW to re-enter SIE once that's completed * instead of passing a "no action" intercept to the guest. */ - if (signal_pending(current) || - kvm_s390_vcpu_has_irq(vcpu, 0) || + if (kvm_s390_vcpu_has_irq(vcpu, 0) || kvm_s390_vcpu_sie_inhibited(vcpu)) { kvm_s390_rewind_psw(vcpu, 4); break; } - cond_resched(); } if (rc == -EFAULT) { @@ -1483,8 +1492,7 @@ int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu) if (unlikely(scb_addr & 0x1ffUL)) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0) || - kvm_s390_vcpu_sie_inhibited(vcpu)) { + if (kvm_s390_vcpu_has_irq(vcpu, 0) || kvm_s390_vcpu_sie_inhibited(vcpu)) { kvm_s390_rewind_psw(vcpu, 4); return 0; } diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 603d9e5febb5..dd85bcca817d 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -596,8 +596,9 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) | _SEGMENT_ENTRY_GMAP_UC | _SEGMENT_ENTRY; } else - *table = pmd_val(*pmd) & - _SEGMENT_ENTRY_HARDWARE_BITS; + *table = (pmd_val(*pmd) & + _SEGMENT_ENTRY_HARDWARE_BITS) + | _SEGMENT_ENTRY; } } else if (*table & _SEGMENT_ENTRY_PROTECT && !(pmd_val(*pmd) & _SEGMENT_ENTRY_PROTECT)) { diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c index d4c3c36855e2..d41b19925a5a 100644 --- a/arch/s390/mm/gmap_helpers.c +++ b/arch/s390/mm/gmap_helpers.c @@ -11,27 +11,27 @@ #include <linux/mm.h> #include <linux/hugetlb.h> #include <linux/swap.h> -#include <linux/swapops.h> +#include <linux/leafops.h> #include <linux/pagewalk.h> #include <linux/ksm.h> #include <asm/gmap_helpers.h> #include <asm/pgtable.h> /** - * ptep_zap_swap_entry() - discard a swap entry. + * ptep_zap_softleaf_entry() - discard a software leaf entry. * @mm: the mm - * @entry: the swap entry that needs to be zapped + * @entry: the software leaf entry that needs to be zapped * - * Discards the given swap entry. If the swap entry was an actual swap - * entry (and not a migration entry, for example), the actual swapped + * Discards the given software leaf entry. If the leaf entry was an actual + * swap entry (and not a migration entry, for example), the actual swapped * page is also discarded from swap. */ -static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) +static void ptep_zap_softleaf_entry(struct mm_struct *mm, softleaf_t entry) { - if (!non_swap_entry(entry)) + if (softleaf_is_swap(entry)) dec_mm_counter(mm, MM_SWAPENTS); - else if (is_migration_entry(entry)) - dec_mm_counter(mm, mm_counter(pfn_swap_entry_folio(entry))); + else if (softleaf_is_migration(entry)) + dec_mm_counter(mm, mm_counter(softleaf_to_folio(entry))); free_swap_and_cache(entry); } @@ -47,6 +47,7 @@ static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr) { struct vm_area_struct *vma; + unsigned long pgstev; spinlock_t *ptl; pgste_t pgste; pte_t *ptep; @@ -65,9 +66,13 @@ void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr) if (pte_swap(*ptep)) { preempt_disable(); pgste = pgste_get_lock(ptep); + pgstev = pgste_val(pgste); - ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep)); - pte_clear(mm, vmaddr, ptep); + if ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED || + (pgstev & _PGSTE_GPS_ZERO)) { + ptep_zap_softleaf_entry(mm, softleaf_from_pte(*ptep)); + pte_clear(mm, vmaddr, ptep); + } pgste_set_unlock(ptep, pgste); preempt_enable(); diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 3042647c9dbf..d3ce04a4b248 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -204,7 +204,7 @@ static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end, return rc; } -static int split_pud_page(pud_t *pudp, unsigned long addr) +int split_pud_page(pud_t *pudp, unsigned long addr) { unsigned long pmd_addr, prot; pmd_t *pm_dir, *pmdp; diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 7ae77df276b5..666adcd681ab 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -16,7 +16,7 @@ #include <linux/spinlock.h> #include <linux/rcupdate.h> #include <linux/slab.h> -#include <linux/swapops.h> +#include <linux/leafops.h> #include <linux/sysctl.h> #include <linux/ksm.h> #include <linux/mman.h> @@ -673,12 +673,12 @@ void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep) pgste_set_unlock(ptep, pgste); } -static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) +static void ptep_zap_softleaf_entry(struct mm_struct *mm, softleaf_t entry) { - if (!non_swap_entry(entry)) + if (softleaf_is_swap(entry)) dec_mm_counter(mm, MM_SWAPENTS); - else if (is_migration_entry(entry)) { - struct folio *folio = pfn_swap_entry_folio(entry); + else if (softleaf_is_migration(entry)) { + struct folio *folio = softleaf_to_folio(entry); dec_mm_counter(mm, mm_counter(folio)); } @@ -700,7 +700,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, if (!reset && pte_swap(pte) && ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED || (pgstev & _PGSTE_GPS_ZERO))) { - ptep_zap_swap_entry(mm, pte_to_swp_entry(pte)); + ptep_zap_softleaf_entry(mm, softleaf_from_pte(pte)); pte_clear(mm, addr, ptep); } if (reset) diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index d96587b84e81..eeadff45e0e1 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -330,10 +330,14 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, if (pud_leaf(*pud)) { if (IS_ALIGNED(addr, PUD_SIZE) && IS_ALIGNED(next, PUD_SIZE)) { + if (!direct) + vmem_free_pages(pud_deref(*pud), get_order(PUD_SIZE), altmap); pud_clear(pud); pages++; + continue; + } else { + split_pud_page(pud, addr & PUD_MASK); } - continue; } } else if (pud_none(*pud)) { if (IS_ALIGNED(addr, PUD_SIZE) && @@ -433,9 +437,15 @@ static int modify_pagetable(unsigned long start, unsigned long end, bool add, if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end))) return -EINVAL; - /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */ + /* Don't mess with any tables not fully in 1:1 mapping, vmemmap & kasan area */ +#ifdef CONFIG_KASAN + if (WARN_ON_ONCE(!(start >= KASAN_SHADOW_START && end <= KASAN_SHADOW_END) && + end > __abs_lowcore)) + return -EINVAL; +#else if (WARN_ON_ONCE(end > __abs_lowcore)) return -EINVAL; +#endif for (addr = start; addr < end; addr = next) { next = pgd_addr_end(addr, end); pgd = pgd_offset_k(addr); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 3238c178bed8..579461d471bb 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -2412,8 +2412,9 @@ bool bpf_jit_supports_far_kfunc_call(void) return true; } -int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, - void *old_addr, void *new_addr) +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t, + enum bpf_text_poke_type new_t, void *old_addr, + void *new_addr) { struct bpf_plt expected_plt, current_plt, new_plt, *plt; struct { @@ -2430,7 +2431,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, if (insn.opc != (0xc004 | (old_addr ? 0xf0 : 0))) return -EINVAL; - if (t == BPF_MOD_JUMP && + if ((new_t == BPF_MOD_JUMP || old_t == BPF_MOD_JUMP) && insn.disp == ((char *)new_addr - (char *)ip) >> 1) { /* * The branch already points to the destination, diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 93d2c9c780fc..5a6ace9d875a 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -708,6 +708,12 @@ int zpci_reenable_device(struct zpci_dev *zdev) if (rc) return rc; + if (zdev->msi_nr_irqs > 0) { + rc = zpci_set_irq(zdev); + if (rc) + return rc; + } + rc = zpci_iommu_register_ioat(zdev, &status); if (rc) zpci_disable_device(zdev); diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index 72adc8f6e94f..66c4bd888b29 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -14,6 +14,7 @@ #include <linux/err.h> #include <linux/delay.h> #include <linux/seq_file.h> +#include <linux/irqdomain.h> #include <linux/jump_label.h> #include <linux/pci.h> #include <linux/printk.h> @@ -198,19 +199,27 @@ static int zpci_bus_create_pci_bus(struct zpci_bus *zbus, struct zpci_dev *fr, s zbus->multifunction = zpci_bus_is_multifunction_root(fr); zbus->max_bus_speed = fr->max_bus_speed; + if (zpci_create_parent_msi_domain(zbus)) + goto out_free_domain; + /* * Note that the zbus->resources are taken over and zbus->resources * is empty after a successful call */ bus = pci_create_root_bus(NULL, ZPCI_BUS_NR, ops, zbus, &zbus->resources); - if (!bus) { - zpci_free_domain(zbus->domain_nr); - return -EFAULT; - } + if (!bus) + goto out_remove_msi_domain; zbus->bus = bus; + dev_set_msi_domain(&zbus->bus->dev, zbus->msi_parent_domain); return 0; + +out_remove_msi_domain: + zpci_remove_parent_msi_domain(zbus); +out_free_domain: + zpci_free_domain(zbus->domain_nr); + return -ENOMEM; } static void zpci_bus_release(struct kref *kref) @@ -231,6 +240,7 @@ static void zpci_bus_release(struct kref *kref) mutex_lock(&zbus_list_lock); list_del(&zbus->bus_next); mutex_unlock(&zbus_list_lock); + zpci_remove_parent_msi_domain(zbus); kfree(zbus); } diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index 2a06df8c2498..e9dd45f3c09d 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -6,6 +6,7 @@ #include <linux/kernel_stat.h> #include <linux/pci.h> #include <linux/msi.h> +#include <linux/irqchip/irq-msi-lib.h> #include <linux/smp.h> #include <asm/isc.h> @@ -97,7 +98,7 @@ static int zpci_clear_directed_irq(struct zpci_dev *zdev) } /* Register adapter interruptions */ -static int zpci_set_irq(struct zpci_dev *zdev) +int zpci_set_irq(struct zpci_dev *zdev) { int rc; @@ -125,27 +126,53 @@ static int zpci_clear_irq(struct zpci_dev *zdev) static int zpci_set_irq_affinity(struct irq_data *data, const struct cpumask *dest, bool force) { - struct msi_desc *entry = irq_data_get_msi_desc(data); - struct msi_msg msg = entry->msg; - int cpu_addr = smp_cpu_get_cpu_address(cpumask_first(dest)); + irq_data_update_affinity(data, dest); + return IRQ_SET_MASK_OK; +} - msg.address_lo &= 0xff0000ff; - msg.address_lo |= (cpu_addr << 8); - pci_write_msi_msg(data->irq, &msg); +/* + * Encode the hwirq number for the parent domain. The encoding must be unique + * for each IRQ of each device in the parent domain, so it uses the devfn to + * identify the device and the msi_index to identify the IRQ within that device. + */ +static inline u32 zpci_encode_hwirq(u8 devfn, u16 msi_index) +{ + return (devfn << 16) | msi_index; +} - return IRQ_SET_MASK_OK; +static inline u16 zpci_decode_hwirq_msi_index(irq_hw_number_t hwirq) +{ + return hwirq & 0xffff; +} + +static void zpci_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) +{ + struct msi_desc *desc = irq_data_get_msi_desc(data); + struct zpci_dev *zdev = to_zpci_dev(desc->dev); + + if (irq_delivery == DIRECTED) { + int cpu = cpumask_first(irq_data_get_affinity_mask(data)); + + msg->address_lo = zdev->msi_addr & 0xff0000ff; + msg->address_lo |= (smp_cpu_get_cpu_address(cpu) << 8); + } else { + msg->address_lo = zdev->msi_addr & 0xffffffff; + } + msg->address_hi = zdev->msi_addr >> 32; + msg->data = zpci_decode_hwirq_msi_index(data->hwirq); } static struct irq_chip zpci_irq_chip = { .name = "PCI-MSI", - .irq_unmask = pci_msi_unmask_irq, - .irq_mask = pci_msi_mask_irq, + .irq_compose_msi_msg = zpci_compose_msi_msg, }; static void zpci_handle_cpu_local_irq(bool rescan) { struct airq_iv *dibv = zpci_ibv[smp_processor_id()]; union zpci_sic_iib iib = {{0}}; + struct irq_domain *msi_domain; + irq_hw_number_t hwirq; unsigned long bit; int irqs_on = 0; @@ -163,7 +190,9 @@ static void zpci_handle_cpu_local_irq(bool rescan) continue; } inc_irq_stat(IRQIO_MSI); - generic_handle_irq(airq_iv_get_data(dibv, bit)); + hwirq = airq_iv_get_data(dibv, bit); + msi_domain = (struct irq_domain *)airq_iv_get_ptr(dibv, bit); + generic_handle_domain_irq(msi_domain, hwirq); } } @@ -228,6 +257,8 @@ static void zpci_floating_irq_handler(struct airq_struct *airq, struct tpi_info *tpi_info) { union zpci_sic_iib iib = {{0}}; + struct irq_domain *msi_domain; + irq_hw_number_t hwirq; unsigned long si, ai; struct airq_iv *aibv; int irqs_on = 0; @@ -255,7 +286,9 @@ static void zpci_floating_irq_handler(struct airq_struct *airq, break; inc_irq_stat(IRQIO_MSI); airq_iv_lock(aibv, ai); - generic_handle_irq(airq_iv_get_data(aibv, ai)); + hwirq = airq_iv_get_data(aibv, ai); + msi_domain = (struct irq_domain *)airq_iv_get_ptr(aibv, ai); + generic_handle_domain_irq(msi_domain, hwirq); airq_iv_unlock(aibv, ai); } } @@ -277,7 +310,9 @@ static int __alloc_airq(struct zpci_dev *zdev, int msi_vecs, zdev->aisb = *bit; /* Create adapter interrupt vector */ - zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK, NULL); + zdev->aibv = airq_iv_create(msi_vecs, + AIRQ_IV_PTR | AIRQ_IV_DATA | AIRQ_IV_BITLOCK, + NULL); if (!zdev->aibv) return -ENOMEM; @@ -289,146 +324,220 @@ static int __alloc_airq(struct zpci_dev *zdev, int msi_vecs, return 0; } -int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +bool arch_restore_msi_irqs(struct pci_dev *pdev) { - unsigned int hwirq, msi_vecs, irqs_per_msi, i, cpu; struct zpci_dev *zdev = to_zpci(pdev); - struct msi_desc *msi; - struct msi_msg msg; - unsigned long bit; - int cpu_addr; - int rc, irq; + zpci_set_irq(zdev); + return true; +} + +static struct airq_struct zpci_airq = { + .handler = zpci_floating_irq_handler, + .isc = PCI_ISC, +}; + +static void zpci_msi_teardown_directed(struct zpci_dev *zdev) +{ + airq_iv_free(zpci_ibv[0], zdev->msi_first_bit, zdev->max_msi); + zdev->msi_first_bit = -1U; + zdev->msi_nr_irqs = 0; +} + +static void zpci_msi_teardown_floating(struct zpci_dev *zdev) +{ + airq_iv_release(zdev->aibv); + zdev->aibv = NULL; + airq_iv_free_bit(zpci_sbv, zdev->aisb); zdev->aisb = -1UL; zdev->msi_first_bit = -1U; + zdev->msi_nr_irqs = 0; +} + +static void zpci_msi_teardown(struct irq_domain *domain, msi_alloc_info_t *arg) +{ + struct zpci_dev *zdev = to_zpci_dev(domain->dev); + + zpci_clear_irq(zdev); + if (irq_delivery == DIRECTED) + zpci_msi_teardown_directed(zdev); + else + zpci_msi_teardown_floating(zdev); +} + +static int zpci_msi_prepare(struct irq_domain *domain, + struct device *dev, int nvec, + msi_alloc_info_t *info) +{ + struct zpci_dev *zdev = to_zpci_dev(dev); + struct pci_dev *pdev = to_pci_dev(dev); + unsigned long bit; + int msi_vecs, rc; msi_vecs = min_t(unsigned int, nvec, zdev->max_msi); if (msi_vecs < nvec) { - pr_info("%s requested %d irqs, allocate system limit of %d", + pr_info("%s requested %d IRQs, allocate system limit of %d\n", pci_name(pdev), nvec, zdev->max_msi); } rc = __alloc_airq(zdev, msi_vecs, &bit); - if (rc < 0) + if (rc) { + pr_err("Allocating adapter IRQs for %s failed\n", pci_name(pdev)); return rc; + } - /* - * Request MSI interrupts: - * When using MSI, nvec_used interrupt sources and their irq - * descriptors are controlled through one msi descriptor. - * Thus the outer loop over msi descriptors shall run only once, - * while two inner loops iterate over the interrupt vectors. - * When using MSI-X, each interrupt vector/irq descriptor - * is bound to exactly one msi descriptor (nvec_used is one). - * So the inner loops are executed once, while the outer iterates - * over the MSI-X descriptors. - */ - hwirq = bit; - msi_for_each_desc(msi, &pdev->dev, MSI_DESC_NOTASSOCIATED) { - if (hwirq - bit >= msi_vecs) - break; - irqs_per_msi = min_t(unsigned int, msi_vecs, msi->nvec_used); - irq = __irq_alloc_descs(-1, 0, irqs_per_msi, 0, THIS_MODULE, - (irq_delivery == DIRECTED) ? - msi->affinity : NULL); - if (irq < 0) - return -ENOMEM; + zdev->msi_first_bit = bit; + zdev->msi_nr_irqs = msi_vecs; + rc = zpci_set_irq(zdev); + if (rc) { + pr_err("Registering adapter IRQs for %s failed\n", + pci_name(pdev)); + + if (irq_delivery == DIRECTED) + zpci_msi_teardown_directed(zdev); + else + zpci_msi_teardown_floating(zdev); + return rc; + } + return 0; +} - for (i = 0; i < irqs_per_msi; i++) { - rc = irq_set_msi_desc_off(irq, i, msi); - if (rc) - return rc; - irq_set_chip_and_handler(irq + i, &zpci_irq_chip, - handle_percpu_irq); - } +static int zpci_msi_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *args) +{ + struct msi_desc *desc = ((msi_alloc_info_t *)args)->desc; + struct zpci_dev *zdev = to_zpci_dev(desc->dev); + struct zpci_bus *zbus = zdev->zbus; + unsigned int cpu, hwirq; + unsigned long bit; + int i; - msg.data = hwirq - bit; - if (irq_delivery == DIRECTED) { - if (msi->affinity) - cpu = cpumask_first(&msi->affinity->mask); - else - cpu = 0; - cpu_addr = smp_cpu_get_cpu_address(cpu); + bit = zdev->msi_first_bit + desc->msi_index; + hwirq = zpci_encode_hwirq(zdev->devfn, desc->msi_index); - msg.address_lo = zdev->msi_addr & 0xff0000ff; - msg.address_lo |= (cpu_addr << 8); + if (desc->msi_index + nr_irqs > zdev->max_msi) + return -EINVAL; + for (i = 0; i < nr_irqs; i++) { + irq_domain_set_info(domain, virq + i, hwirq + i, + &zpci_irq_chip, zdev, + handle_percpu_irq, NULL, NULL); + + if (irq_delivery == DIRECTED) { for_each_possible_cpu(cpu) { - for (i = 0; i < irqs_per_msi; i++) - airq_iv_set_data(zpci_ibv[cpu], - hwirq + i, irq + i); + airq_iv_set_ptr(zpci_ibv[cpu], bit + i, + (unsigned long)zbus->msi_parent_domain); + airq_iv_set_data(zpci_ibv[cpu], bit + i, hwirq + i); } } else { - msg.address_lo = zdev->msi_addr & 0xffffffff; - for (i = 0; i < irqs_per_msi; i++) - airq_iv_set_data(zdev->aibv, hwirq + i, irq + i); + airq_iv_set_ptr(zdev->aibv, bit + i, + (unsigned long)zbus->msi_parent_domain); + airq_iv_set_data(zdev->aibv, bit + i, hwirq + i); } - msg.address_hi = zdev->msi_addr >> 32; - pci_write_msi_msg(irq, &msg); - hwirq += irqs_per_msi; } - zdev->msi_first_bit = bit; - zdev->msi_nr_irqs = hwirq - bit; - - rc = zpci_set_irq(zdev); - if (rc) - return rc; - - return (zdev->msi_nr_irqs == nvec) ? 0 : zdev->msi_nr_irqs; + return 0; } -void arch_teardown_msi_irqs(struct pci_dev *pdev) +static void zpci_msi_clear_airq(struct irq_data *d, int i) { - struct zpci_dev *zdev = to_zpci(pdev); - struct msi_desc *msi; - unsigned int i; - int rc; + struct msi_desc *desc = irq_data_get_msi_desc(d); + struct zpci_dev *zdev = to_zpci_dev(desc->dev); + unsigned long bit; + unsigned int cpu; + u16 msi_index; - /* Disable interrupts */ - rc = zpci_clear_irq(zdev); - if (rc) - return; + msi_index = zpci_decode_hwirq_msi_index(d->hwirq); + bit = zdev->msi_first_bit + msi_index; - /* Release MSI interrupts */ - msi_for_each_desc(msi, &pdev->dev, MSI_DESC_ASSOCIATED) { - for (i = 0; i < msi->nvec_used; i++) { - irq_set_msi_desc(msi->irq + i, NULL); - irq_free_desc(msi->irq + i); + if (irq_delivery == DIRECTED) { + for_each_possible_cpu(cpu) { + airq_iv_set_ptr(zpci_ibv[cpu], bit + i, 0); + airq_iv_set_data(zpci_ibv[cpu], bit + i, 0); } - msi->msg.address_lo = 0; - msi->msg.address_hi = 0; - msi->msg.data = 0; - msi->irq = 0; + } else { + airq_iv_set_ptr(zdev->aibv, bit + i, 0); + airq_iv_set_data(zdev->aibv, bit + i, 0); } +} - if (zdev->aisb != -1UL) { - zpci_ibv[zdev->aisb] = NULL; - airq_iv_free_bit(zpci_sbv, zdev->aisb); - zdev->aisb = -1UL; - } - if (zdev->aibv) { - airq_iv_release(zdev->aibv); - zdev->aibv = NULL; - } +static void zpci_msi_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + struct irq_data *d; + int i; - if ((irq_delivery == DIRECTED) && zdev->msi_first_bit != -1U) - airq_iv_free(zpci_ibv[0], zdev->msi_first_bit, zdev->msi_nr_irqs); + for (i = 0; i < nr_irqs; i++) { + d = irq_domain_get_irq_data(domain, virq + i); + zpci_msi_clear_airq(d, i); + irq_domain_reset_irq_data(d); + } } -bool arch_restore_msi_irqs(struct pci_dev *pdev) +static const struct irq_domain_ops zpci_msi_domain_ops = { + .alloc = zpci_msi_domain_alloc, + .free = zpci_msi_domain_free, +}; + +static bool zpci_init_dev_msi_info(struct device *dev, struct irq_domain *domain, + struct irq_domain *real_parent, + struct msi_domain_info *info) { - struct zpci_dev *zdev = to_zpci(pdev); + if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info)) + return false; + + info->ops->msi_prepare = zpci_msi_prepare; + info->ops->msi_teardown = zpci_msi_teardown; - zpci_set_irq(zdev); return true; } -static struct airq_struct zpci_airq = { - .handler = zpci_floating_irq_handler, - .isc = PCI_ISC, +static struct msi_parent_ops zpci_msi_parent_ops = { + .supported_flags = MSI_GENERIC_FLAGS_MASK | + MSI_FLAG_PCI_MSIX | + MSI_FLAG_MULTI_PCI_MSI, + .required_flags = MSI_FLAG_USE_DEF_DOM_OPS | + MSI_FLAG_USE_DEF_CHIP_OPS, + .init_dev_msi_info = zpci_init_dev_msi_info, }; +int zpci_create_parent_msi_domain(struct zpci_bus *zbus) +{ + char fwnode_name[18]; + + snprintf(fwnode_name, sizeof(fwnode_name), "ZPCI_MSI_DOM_%04x", zbus->domain_nr); + struct irq_domain_info info = { + .fwnode = irq_domain_alloc_named_fwnode(fwnode_name), + .ops = &zpci_msi_domain_ops, + }; + + if (!info.fwnode) { + pr_err("Failed to allocate fwnode for MSI IRQ domain\n"); + return -ENOMEM; + } + + if (irq_delivery == FLOATING) + zpci_msi_parent_ops.required_flags |= MSI_FLAG_NO_AFFINITY; + + zbus->msi_parent_domain = msi_create_parent_irq_domain(&info, &zpci_msi_parent_ops); + if (!zbus->msi_parent_domain) { + irq_domain_free_fwnode(info.fwnode); + pr_err("Failed to create MSI IRQ domain\n"); + return -ENOMEM; + } + + return 0; +} + +void zpci_remove_parent_msi_domain(struct zpci_bus *zbus) +{ + struct fwnode_handle *fn; + + fn = zbus->msi_parent_domain->fwnode; + irq_domain_remove(zbus->msi_parent_domain); + irq_domain_free_fwnode(fn); +} + static void __init cpu_enable_directed_irq(void *unused) { union zpci_sic_iib iib = {{0}}; @@ -465,6 +574,7 @@ static int __init zpci_directed_irq_init(void) * is only done on the first vector. */ zpci_ibv[cpu] = airq_iv_create(cache_line_size() * BITS_PER_BYTE, + AIRQ_IV_PTR | AIRQ_IV_DATA | AIRQ_IV_CACHELINE | (!cpu ? AIRQ_IV_ALLOC : 0), NULL); |
