summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt16
-rw-r--r--arch/powerpc/Kconfig10
-rw-r--r--arch/powerpc/Makefile9
-rw-r--r--arch/powerpc/boot/crt0.S33
-rw-r--r--arch/powerpc/boot/dts/digsy_mtc.dts8
-rw-r--r--arch/powerpc/boot/dts/o2d.dtsi6
-rw-r--r--arch/powerpc/boot/zImage.lds.S7
-rw-r--r--arch/powerpc/configs/microwatt_defconfig3
-rw-r--r--arch/powerpc/configs/ppc64_defconfig1
-rw-r--r--arch/powerpc/configs/pseries_defconfig1
-rw-r--r--arch/powerpc/crypto/md5-asm.S10
-rw-r--r--arch/powerpc/crypto/sha1-powerpc-asm.S6
-rw-r--r--arch/powerpc/include/asm/asm-prototypes.h5
-rw-r--r--arch/powerpc/include/asm/atomic.h151
-rw-r--r--arch/powerpc/include/asm/bitops.h89
-rw-r--r--arch/powerpc/include/asm/book3s/32/kup.h108
-rw-r--r--arch/powerpc/include/asm/book3s/32/mmu-hash.h82
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash.h4
-rw-r--r--arch/powerpc/include/asm/book3s/64/kup.h56
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu-hash.h8
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu.h38
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-hash.h6
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush.h4
-rw-r--r--arch/powerpc/include/asm/book3s/pgtable.h4
-rw-r--r--arch/powerpc/include/asm/btext.h10
-rw-r--r--arch/powerpc/include/asm/code-patching.h40
-rw-r--r--arch/powerpc/include/asm/cpm2.h6
-rw-r--r--arch/powerpc/include/asm/cpuidle.h2
-rw-r--r--arch/powerpc/include/asm/cputhreads.h33
-rw-r--r--arch/powerpc/include/asm/eeh.h2
-rw-r--r--arch/powerpc/include/asm/exception-64e.h4
-rw-r--r--arch/powerpc/include/asm/fadump-internal.h6
-rw-r--r--arch/powerpc/include/asm/firmware.h8
-rw-r--r--arch/powerpc/include/asm/floppy.h8
-rw-r--r--arch/powerpc/include/asm/head-64.h12
-rw-r--r--arch/powerpc/include/asm/hugetlb.h2
-rw-r--r--arch/powerpc/include/asm/hw_breakpoint.h5
-rw-r--r--arch/powerpc/include/asm/hw_irq.h107
-rw-r--r--arch/powerpc/include/asm/i8259.h2
-rw-r--r--arch/powerpc/include/asm/inst.h95
-rw-r--r--arch/powerpc/include/asm/interrupt.h73
-rw-r--r--arch/powerpc/include/asm/iommu.h2
-rw-r--r--arch/powerpc/include/asm/ipic.h2
-rw-r--r--arch/powerpc/include/asm/irq.h2
-rw-r--r--arch/powerpc/include/asm/kexec.h2
-rw-r--r--arch/powerpc/include/asm/kup.h122
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h1
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h6
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h5
-rw-r--r--arch/powerpc/include/asm/kvm_guest.h2
-rw-r--r--arch/powerpc/include/asm/kvm_host.h7
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h4
-rw-r--r--arch/powerpc/include/asm/machdep.h2
-rw-r--r--arch/powerpc/include/asm/mmu.h16
-rw-r--r--arch/powerpc/include/asm/mmu_context.h4
-rw-r--r--arch/powerpc/include/asm/mpic.h2
-rw-r--r--arch/powerpc/include/asm/nohash/32/kup-8xx.h50
-rw-r--r--arch/powerpc/include/asm/nohash/32/mmu-44x.h1
-rw-r--r--arch/powerpc/include/asm/nohash/32/mmu-8xx.h6
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable.h6
-rw-r--r--arch/powerpc/include/asm/nohash/kup-booke.h110
-rw-r--r--arch/powerpc/include/asm/opal-api.h1
-rw-r--r--arch/powerpc/include/asm/opal.h2
-rw-r--r--arch/powerpc/include/asm/paca.h8
-rw-r--r--arch/powerpc/include/asm/pci.h2
-rw-r--r--arch/powerpc/include/asm/perf_event_server.h2
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h6
-rw-r--r--arch/powerpc/include/asm/ppc_asm.h43
-rw-r--r--arch/powerpc/include/asm/processor.h18
-rw-r--r--arch/powerpc/include/asm/ptrace.h2
-rw-r--r--arch/powerpc/include/asm/reg.h16
-rw-r--r--arch/powerpc/include/asm/rtas.h2
-rw-r--r--arch/powerpc/include/asm/sections.h16
-rw-r--r--arch/powerpc/include/asm/setup.h9
-rw-r--r--arch/powerpc/include/asm/smu.h2
-rw-r--r--arch/powerpc/include/asm/sstep.h4
-rw-r--r--arch/powerpc/include/asm/switch_to.h3
-rw-r--r--arch/powerpc/include/asm/task_size_64.h6
-rw-r--r--arch/powerpc/include/asm/time.h19
-rw-r--r--arch/powerpc/include/asm/udbg.h10
-rw-r--r--arch/powerpc/include/asm/uprobes.h1
-rw-r--r--arch/powerpc/include/asm/xics.h4
-rw-r--r--arch/powerpc/include/asm/xmon.h2
-rw-r--r--arch/powerpc/kernel/Makefile1
-rw-r--r--arch/powerpc/kernel/align.c4
-rw-r--r--arch/powerpc/kernel/asm-offsets.c5
-rw-r--r--arch/powerpc/kernel/btext.c16
-rw-r--r--arch/powerpc/kernel/cacheinfo.c5
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.c12
-rw-r--r--arch/powerpc/kernel/dbell.c3
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c24
-rw-r--r--arch/powerpc/kernel/eeh_cache.c2
-rw-r--r--arch/powerpc/kernel/eeh_driver.c162
-rw-r--r--arch/powerpc/kernel/entry_32.S54
-rw-r--r--arch/powerpc/kernel/entry_64.S4
-rw-r--r--arch/powerpc/kernel/epapr_paravirt.c2
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S14
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S174
-rw-r--r--arch/powerpc/kernel/fadump.c26
-rw-r--r--arch/powerpc/kernel/fpu.S5
-rw-r--r--arch/powerpc/kernel/head_32.h9
-rw-r--r--arch/powerpc/kernel/head_40x.S17
-rw-r--r--arch/powerpc/kernel/head_44x.S26
-rw-r--r--arch/powerpc/kernel/head_64.S20
-rw-r--r--arch/powerpc/kernel/head_book3s_32.S4
-rw-r--r--arch/powerpc/kernel/head_booke.h3
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S13
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c4
-rw-r--r--arch/powerpc/kernel/hw_breakpoint_constraints.c4
-rw-r--r--arch/powerpc/kernel/idle.c2
-rw-r--r--arch/powerpc/kernel/idle_6xx.S2
-rw-r--r--arch/powerpc/kernel/interrupt.c3
-rw-r--r--arch/powerpc/kernel/interrupt_64.S46
-rw-r--r--arch/powerpc/kernel/irq.c5
-rw-r--r--arch/powerpc/kernel/kgdb.c4
-rw-r--r--arch/powerpc/kernel/kprobes.c4
-rw-r--r--arch/powerpc/kernel/l2cr_6xx.S6
-rw-r--r--arch/powerpc/kernel/mce.c2
-rw-r--r--arch/powerpc/kernel/mce_power.c18
-rw-r--r--arch/powerpc/kernel/module.c11
-rw-r--r--arch/powerpc/kernel/module_32.c33
-rw-r--r--arch/powerpc/kernel/nvram_64.c6
-rw-r--r--arch/powerpc/kernel/optprobes.c12
-rw-r--r--arch/powerpc/kernel/optprobes_head.S4
-rw-r--r--arch/powerpc/kernel/paca.c18
-rw-r--r--arch/powerpc/kernel/pci-common.c2
-rw-r--r--arch/powerpc/kernel/pci_32.c4
-rw-r--r--arch/powerpc/kernel/process.c58
-rw-r--r--arch/powerpc/kernel/prom.c6
-rw-r--r--arch/powerpc/kernel/prom_init.c14
-rw-r--r--arch/powerpc/kernel/rtas.c104
-rw-r--r--arch/powerpc/kernel/rtasd.c6
-rw-r--r--arch/powerpc/kernel/security.c4
-rw-r--r--arch/powerpc/kernel/setup-common.c2
-rw-r--r--arch/powerpc/kernel/setup.h2
-rw-r--r--arch/powerpc/kernel/setup_32.c4
-rw-r--r--arch/powerpc/kernel/setup_64.c23
-rw-r--r--arch/powerpc/kernel/signal_32.c14
-rw-r--r--arch/powerpc/kernel/smp.c47
-rw-r--r--arch/powerpc/kernel/swsusp_32.S2
-rw-r--r--arch/powerpc/kernel/swsusp_asm64.S2
-rw-r--r--arch/powerpc/kernel/sysfs.c10
-rw-r--r--arch/powerpc/kernel/time.c87
-rw-r--r--arch/powerpc/kernel/tm.S15
-rw-r--r--arch/powerpc/kernel/trace/ftrace.c107
-rw-r--r--arch/powerpc/kernel/trace/ftrace_32.S118
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_mprofile.S15
-rw-r--r--arch/powerpc/kernel/udbg_16550.c10
-rw-r--r--arch/powerpc/kernel/vecemu.c2
-rw-r--r--arch/powerpc/kernel/vector.S10
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S16
-rw-r--r--arch/powerpc/kernel/watchdog.c223
-rw-r--r--arch/powerpc/kexec/core.c2
-rw-r--r--arch/powerpc/kexec/core_64.c4
-rw-r--r--arch/powerpc/kexec/ranges.c2
-rw-r--r--arch/powerpc/kvm/Kconfig16
-rw-r--r--arch/powerpc/kvm/book3s_64_entry.S11
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv.c860
-rw-r--r--arch/powerpc/kvm/book3s_hv.h42
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c55
-rw-r--r--arch/powerpc/kvm/book3s_hv_hmi.c7
-rw-r--r--arch/powerpc/kvm/book3s_hv_interrupts.S13
-rw-r--r--arch/powerpc/kvm/book3s_hv_nested.c10
-rw-r--r--arch/powerpc/kvm/book3s_hv_p9_entry.c911
-rw-r--r--arch/powerpc/kvm/book3s_hv_ras.c54
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c6
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S78
-rw-r--r--arch/powerpc/lib/Makefile7
-rw-r--r--arch/powerpc/lib/code-patching.c497
-rw-r--r--arch/powerpc/lib/feature-fixups.c30
-rw-r--r--arch/powerpc/lib/sstep.c4
-rw-r--r--arch/powerpc/lib/test-code-patching.c362
-rw-r--r--arch/powerpc/lib/test_code-patching.S20
-rw-r--r--arch/powerpc/lib/test_emulate_step.c10
-rw-r--r--arch/powerpc/lib/test_emulate_step_exec_instr.S8
-rw-r--r--arch/powerpc/mm/book3s32/Makefile1
-rw-r--r--arch/powerpc/mm/book3s32/kuap.c5
-rw-r--r--arch/powerpc/mm/book3s32/kuep.c20
-rw-r--r--arch/powerpc/mm/book3s32/mmu.c7
-rw-r--r--arch/powerpc/mm/book3s32/mmu_context.c15
-rw-r--r--arch/powerpc/mm/book3s64/Makefile19
-rw-r--r--arch/powerpc/mm/book3s64/hash_native.c108
-rw-r--r--arch/powerpc/mm/book3s64/hash_pgtable.c1
-rw-r--r--arch/powerpc/mm/book3s64/hash_utils.c117
-rw-r--r--arch/powerpc/mm/book3s64/hugetlbpage.c (renamed from arch/powerpc/mm/book3s64/hash_hugetlbpage.c)4
-rw-r--r--arch/powerpc/mm/book3s64/mmu_context.c34
-rw-r--r--arch/powerpc/mm/book3s64/pgtable.c32
-rw-r--r--arch/powerpc/mm/book3s64/pkeys.c2
-rw-r--r--arch/powerpc/mm/book3s64/radix_pgtable.c40
-rw-r--r--arch/powerpc/mm/book3s64/slb.c16
-rw-r--r--arch/powerpc/mm/book3s64/trace.c8
-rw-r--r--arch/powerpc/mm/copro_fault.c2
-rw-r--r--arch/powerpc/mm/fault.c24
-rw-r--r--arch/powerpc/mm/hugetlbpage.c16
-rw-r--r--arch/powerpc/mm/init-common.c21
-rw-r--r--arch/powerpc/mm/init_64.c59
-rw-r--r--arch/powerpc/mm/ioremap.c20
-rw-r--r--arch/powerpc/mm/kasan/book3s_32.c3
-rw-r--r--arch/powerpc/mm/maccess.c17
-rw-r--r--arch/powerpc/mm/mem.c2
-rw-r--r--arch/powerpc/mm/mmap.c40
-rw-r--r--arch/powerpc/mm/mmu_context.c11
-rw-r--r--arch/powerpc/mm/nohash/44x.c20
-rw-r--r--arch/powerpc/mm/nohash/8xx.c33
-rw-r--r--arch/powerpc/mm/nohash/Makefile2
-rw-r--r--arch/powerpc/mm/nohash/book3e_pgtable.c15
-rw-r--r--arch/powerpc/mm/nohash/fsl_book3e.c10
-rw-r--r--arch/powerpc/mm/nohash/kup.c33
-rw-r--r--arch/powerpc/mm/nohash/mmu_context.c6
-rw-r--r--arch/powerpc/mm/nohash/tlb.c13
-rw-r--r--arch/powerpc/mm/nohash/tlb_low_64e.S40
-rw-r--r--arch/powerpc/mm/numa.c6
-rw-r--r--arch/powerpc/mm/pgtable.c9
-rw-r--r--arch/powerpc/mm/pgtable_64.c14
-rw-r--r--arch/powerpc/mm/ptdump/Makefile2
-rw-r--r--arch/powerpc/mm/ptdump/ptdump.c6
-rw-r--r--arch/powerpc/mm/slice.c20
-rw-r--r--arch/powerpc/net/bpf_jit.h17
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c68
-rw-r--r--arch/powerpc/net/bpf_jit_comp32.c101
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c72
-rw-r--r--arch/powerpc/perf/8xx-pmu.c2
-rw-r--r--arch/powerpc/perf/core-book3s.c126
-rw-r--r--arch/powerpc/perf/generic-compat-pmu.c2
-rw-r--r--arch/powerpc/perf/hv-24x7.c2
-rw-r--r--arch/powerpc/perf/internal.h18
-rw-r--r--arch/powerpc/perf/isa207-common.c60
-rw-r--r--arch/powerpc/perf/power10-pmu.c2
-rw-r--r--arch/powerpc/perf/power5+-pmu.c2
-rw-r--r--arch/powerpc/perf/power5-pmu.c2
-rw-r--r--arch/powerpc/perf/power6-pmu.c2
-rw-r--r--arch/powerpc/perf/power7-pmu.c2
-rw-r--r--arch/powerpc/perf/power8-pmu.c2
-rw-r--r--arch/powerpc/perf/power9-pmu.c2
-rw-r--r--arch/powerpc/perf/ppc970-pmu.c2
-rw-r--r--arch/powerpc/platforms/44x/fsp2.c4
-rw-r--r--arch/powerpc/platforms/4xx/cpm.c4
-rw-r--r--arch/powerpc/platforms/4xx/pci.c2
-rw-r--r--arch/powerpc/platforms/512x/clock-commonclk.c52
-rw-r--r--arch/powerpc/platforms/512x/mpc512x.h4
-rw-r--r--arch/powerpc/platforms/512x/mpc512x_shared.c4
-rw-r--r--arch/powerpc/platforms/52xx/Kconfig2
-rw-r--r--arch/powerpc/platforms/83xx/km83xx.c2
-rw-r--r--arch/powerpc/platforms/83xx/mpc834x_mds.c2
-rw-r--r--arch/powerpc/platforms/83xx/mpc837x_mds.c2
-rw-r--r--arch/powerpc/platforms/83xx/mpc837x_rdb.c2
-rw-r--r--arch/powerpc/platforms/83xx/mpc83xx.h6
-rw-r--r--arch/powerpc/platforms/83xx/usb.c6
-rw-r--r--arch/powerpc/platforms/85xx/c293pcie.c2
-rw-r--r--arch/powerpc/platforms/85xx/ge_imp3a.c2
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_cds.c2
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c2
-rw-r--r--arch/powerpc/platforms/85xx/smp.c4
-rw-r--r--arch/powerpc/platforms/85xx/socrates_fpga_pic.c2
-rw-r--r--arch/powerpc/platforms/85xx/socrates_fpga_pic.h2
-rw-r--r--arch/powerpc/platforms/85xx/xes_mpc85xx.c4
-rw-r--r--arch/powerpc/platforms/Kconfig4
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype58
-rw-r--r--arch/powerpc/platforms/cell/Kconfig3
-rw-r--r--arch/powerpc/platforms/cell/cbe_regs.c2
-rw-r--r--arch/powerpc/platforms/cell/iommu.c15
-rw-r--r--arch/powerpc/platforms/cell/pervasive.c1
-rw-r--r--arch/powerpc/platforms/cell/spu_base.c6
-rw-r--r--arch/powerpc/platforms/cell/spu_manage.c16
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c2
-rw-r--r--arch/powerpc/platforms/chrp/Kconfig2
-rw-r--r--arch/powerpc/platforms/chrp/pegasos_eth.c2
-rw-r--r--arch/powerpc/platforms/embedded6xx/Kconfig2
-rw-r--r--arch/powerpc/platforms/embedded6xx/hlwd-pic.c5
-rw-r--r--arch/powerpc/platforms/embedded6xx/hlwd-pic.h2
-rw-r--r--arch/powerpc/platforms/embedded6xx/holly.c2
-rw-r--r--arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c4
-rw-r--r--arch/powerpc/platforms/embedded6xx/wii.c2
-rw-r--r--arch/powerpc/platforms/maple/Kconfig3
-rw-r--r--arch/powerpc/platforms/microwatt/Kconfig1
-rw-r--r--arch/powerpc/platforms/microwatt/rng.c2
-rw-r--r--arch/powerpc/platforms/pasemi/Kconfig3
-rw-r--r--arch/powerpc/platforms/pasemi/msi.c2
-rw-r--r--arch/powerpc/platforms/pasemi/pasemi.h2
-rw-r--r--arch/powerpc/platforms/pasemi/pci.c2
-rw-r--r--arch/powerpc/platforms/pasemi/setup.c2
-rw-r--r--arch/powerpc/platforms/powermac/Kconfig3
-rw-r--r--arch/powerpc/platforms/powermac/cache.S4
-rw-r--r--arch/powerpc/platforms/powermac/feature.c2
-rw-r--r--arch/powerpc/platforms/powermac/low_i2c.c3
-rw-r--r--arch/powerpc/platforms/powermac/nvram.c2
-rw-r--r--arch/powerpc/platforms/powermac/pfunc_base.c6
-rw-r--r--arch/powerpc/platforms/powermac/pic.c6
-rw-r--r--arch/powerpc/platforms/powermac/setup.c6
-rw-r--r--arch/powerpc/platforms/powermac/smp.c4
-rw-r--r--arch/powerpc/platforms/powermac/udbg_scc.c2
-rw-r--r--arch/powerpc/platforms/powernv/Kconfig2
-rw-r--r--arch/powerpc/platforms/powernv/idle.c27
-rw-r--r--arch/powerpc/platforms/powernv/opal-core.c6
-rw-r--r--arch/powerpc/platforms/powernv/opal-dump.c3
-rw-r--r--arch/powerpc/platforms/powernv/opal-elog.c3
-rw-r--r--arch/powerpc/platforms/powernv/opal-fadump.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-imc.c6
-rw-r--r--arch/powerpc/platforms/powernv/opal-lpc.c1
-rw-r--r--arch/powerpc/platforms/powernv/opal-msglog.c4
-rw-r--r--arch/powerpc/platforms/powernv/opal-power.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-powercap.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-rtc.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-sensor-groups.c4
-rw-r--r--arch/powerpc/platforms/powernv/opal.c8
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c4
-rw-r--r--arch/powerpc/platforms/powernv/powernv.h4
-rw-r--r--arch/powerpc/platforms/powernv/rng.c2
-rw-r--r--arch/powerpc/platforms/powernv/setup.c18
-rw-r--r--arch/powerpc/platforms/ps3/gelic_udbg.c2
-rw-r--r--arch/powerpc/platforms/ps3/mm.c4
-rw-r--r--arch/powerpc/platforms/ps3/os-area.c4
-rw-r--r--arch/powerpc/platforms/ps3/platform.h14
-rw-r--r--arch/powerpc/platforms/ps3/repository.c20
-rw-r--r--arch/powerpc/platforms/ps3/smp.c2
-rw-r--r--arch/powerpc/platforms/ps3/spu.c2
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig5
-rw-r--r--arch/powerpc/platforms/pseries/Makefile1
-rw-r--r--arch/powerpc/platforms/pseries/event_sources.c2
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c9
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c2
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c71
-rw-r--r--arch/powerpc/platforms/pseries/lparcfg.c5
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c4
-rw-r--r--arch/powerpc/platforms/pseries/pseries.h7
-rw-r--r--arch/powerpc/platforms/pseries/ras.c2
-rw-r--r--arch/powerpc/platforms/pseries/rtas-fadump.c6
-rw-r--r--arch/powerpc/platforms/pseries/scanlog.c195
-rw-r--r--arch/powerpc/platforms/pseries/setup.c10
-rw-r--r--arch/powerpc/platforms/pseries/vas.c13
-rw-r--r--arch/powerpc/platforms/pseries/vio.c6
-rw-r--r--arch/powerpc/sysdev/cpm2.c6
-rw-r--r--arch/powerpc/sysdev/dart_iommu.c2
-rw-r--r--arch/powerpc/sysdev/fsl_mpic_err.c4
-rw-r--r--arch/powerpc/sysdev/fsl_pci.c2
-rw-r--r--arch/powerpc/sysdev/fsl_pci.h2
-rw-r--r--arch/powerpc/sysdev/i8259.c2
-rw-r--r--arch/powerpc/sysdev/ipic.c2
-rw-r--r--arch/powerpc/sysdev/mpic.c5
-rw-r--r--arch/powerpc/sysdev/mpic.h10
-rw-r--r--arch/powerpc/sysdev/mpic_msi.c6
-rw-r--r--arch/powerpc/sysdev/mpic_timer.c6
-rw-r--r--arch/powerpc/sysdev/mpic_u3msi.c2
-rw-r--r--arch/powerpc/sysdev/tsi108_dev.c3
-rw-r--r--arch/powerpc/sysdev/tsi108_pci.c2
-rw-r--r--arch/powerpc/sysdev/udbg_memcons.c2
-rw-r--r--arch/powerpc/sysdev/xics/icp-hv.c2
-rw-r--r--arch/powerpc/sysdev/xics/icp-opal.c2
-rw-r--r--arch/powerpc/sysdev/xics/xics-common.c2
-rw-r--r--arch/powerpc/sysdev/xive/common.c214
-rw-r--r--arch/powerpc/sysdev/xive/native.c19
-rw-r--r--arch/powerpc/sysdev/xive/spapr.c47
-rw-r--r--arch/powerpc/sysdev/xive/xive-internal.h2
-rw-r--r--arch/powerpc/xmon/xmon.c38
-rw-r--r--arch/powerpc/xmon/xmon_bpts.h4
-rw-r--r--drivers/macintosh/mediabay.c2
-rw-r--r--drivers/misc/cxl/Kconfig1
-rw-r--r--drivers/misc/lkdtm/Makefile2
-rw-r--r--drivers/misc/lkdtm/core.c2
-rw-r--r--drivers/misc/ocxl/file.c4
-rw-r--r--include/linux/cuda.h2
-rw-r--r--include/linux/pmu.h2
-rw-r--r--include/uapi/linux/perf_event.h5
-rwxr-xr-xtools/testing/selftests/powerpc/security/mitigation-patching.sh5
-rw-r--r--tools/testing/selftests/powerpc/security/spectre_v2.c2
-rw-r--r--tools/testing/selftests/powerpc/signal/.gitignore2
-rw-r--r--tools/testing/selftests/powerpc/signal/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/signal/sigreturn_kernel.c132
-rw-r--r--tools/testing/selftests/powerpc/signal/sigreturn_unaligned.c43
370 files changed, 5618 insertions, 3431 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 4955d2faf4cc..f5a27f067db9 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3393,7 +3393,7 @@
Disable SMAP (Supervisor Mode Access Prevention)
even if it is supported by processor.
- nosmep [X86,PPC]
+ nosmep [X86,PPC64s]
Disable SMEP (Supervisor Mode Execution Prevention)
even if it is supported by processor.
@@ -4166,6 +4166,14 @@
Override pmtimer IOPort with a hex value.
e.g. pmtmr=0x508
+ pmu_override= [PPC] Override the PMU.
+ This option takes over the PMU facility, so it is no
+ longer usable by perf. Setting this option starts the
+ PMU counters by setting MMCR0 to 0 (the FC bit is
+ cleared). If a number is given, then MMCR1 is set to
+ that number, otherwise (e.g., 'pmu_override=on'), MMCR1
+ remains 0.
+
pm_debug_messages [SUSPEND,KNL]
Enable suspend/resume debug messages during boot up.
@@ -6494,6 +6502,12 @@
controller on both pseries and powernv
platforms. Only useful on POWER9 and above.
+ xive.store-eoi=off [PPC]
+ By default on POWER10 and above, the kernel will use
+ stores for EOI handling when the XIVE interrupt mode
+ is active. This option allows the XIVE driver to use
+ loads instead, as on POWER9.
+
xhci-hcd.quirks [USB,KNL]
A hex value specifying bitmask with supplemental xhci
host controller quirks. Meaning of each bit can be
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index dea74d7717c0..0631c9241af3 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -129,7 +129,7 @@ config PPC
select ARCH_HAS_KCOV
select ARCH_HAS_MEMBARRIER_CALLBACKS
select ARCH_HAS_MEMBARRIER_SYNC_CORE
- select ARCH_HAS_MEMREMAP_COMPAT_ALIGN
+ select ARCH_HAS_MEMREMAP_COMPAT_ALIGN if PPC_64S_HASH_MMU
select ARCH_HAS_MMIOWB if PPC64
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PHYS_TO_DMA
@@ -165,6 +165,7 @@ config PPC
select BINFMT_ELF
select BUILDTIME_TABLE_SORT
select CLONE_BACKWARDS
+ select CPUMASK_OFFSTACK if NR_CPUS >= 8192
select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN
select DMA_OPS_BYPASS if PPC64
select DMA_OPS if PPC64
@@ -205,7 +206,7 @@ config PPC
select HAVE_DEBUG_KMEMLEAK
select HAVE_DEBUG_STACKOVERFLOW
select HAVE_DYNAMIC_FTRACE
- select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL
+ select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL || PPC32
select HAVE_EBPF_JIT
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU)
select HAVE_FAST_GUP
@@ -229,7 +230,7 @@ config PPC
select HAVE_KPROBES_ON_FTRACE
select HAVE_KRETPROBES
select HAVE_LD_DEAD_CODE_DATA_ELIMINATION
- select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS
+ select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS && PPC64
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S)
select HAVE_OPTPROBES
@@ -845,7 +846,7 @@ config FORCE_MAX_ZONEORDER
config PPC_SUBPAGE_PROT
bool "Support setting protections for 4k subpages (subpage_prot syscall)"
default n
- depends on PPC_BOOK3S_64 && PPC_64K_PAGES
+ depends on PPC_64S_HASH_MMU && PPC_64K_PAGES
help
This option adds support for system call to allow user programs
to set access permissions (read/write, readonly, or no access)
@@ -943,6 +944,7 @@ config PPC_MEM_KEYS
prompt "PowerPC Memory Protection Keys"
def_bool y
depends on PPC_BOOK3S_64
+ depends on PPC_64S_HASH_MMU
select ARCH_USES_HIGH_VMA_FLAGS
select ARCH_HAS_PKEYS
help
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index e02568f17334..5f16ac1583c5 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -245,7 +245,9 @@ cpu-as-$(CONFIG_E500) += -Wa,-me500
# When using '-many -mpower4' gas will first try and find a matching power4
# mnemonic and failing that it will allow any valid mnemonic that GAS knows
# about. GCC will pass -many to GAS when assembling, clang does not.
-cpu-as-$(CONFIG_PPC_BOOK3S_64) += -Wa,-mpower4 -Wa,-many
+# LLVM IAS doesn't understand either flag: https://github.com/ClangBuiltLinux/linux/issues/675
+# but LLVM IAS only supports ISA >= 2.06 for Book3S 64 anyway...
+cpu-as-$(CONFIG_PPC_BOOK3S_64) += $(call as-option,-Wa$(comma)-mpower4) $(call as-option,-Wa$(comma)-many)
cpu-as-$(CONFIG_PPC_E500MC) += $(call as-option,-Wa$(comma)-me500mc)
KBUILD_AFLAGS += $(cpu-as-y)
@@ -445,10 +447,11 @@ PHONY += checkbin
# Check toolchain versions:
# - gcc-4.6 is the minimum kernel-wide version so nothing required.
checkbin:
- @if test "x${CONFIG_CPU_LITTLE_ENDIAN}" = "xy" \
- && $(LD) --version | head -1 | grep ' 2\.24$$' >/dev/null ; then \
+ @if test "x${CONFIG_LD_IS_LLD}" != "xy" -a \
+ "x$(call ld-ifversion, -le, 22400, y)" = "xy" ; then \
echo -n '*** binutils 2.24 miscompiles weak symbols ' ; \
echo 'in some circumstances.' ; \
+ echo '*** binutils 2.23 do not define the TOC symbol ' ; \
echo -n '*** Please use a different binutils version.' ; \
false ; \
fi
diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S
index 1d83966f5ef6..feadee18e271 100644
--- a/arch/powerpc/boot/crt0.S
+++ b/arch/powerpc/boot/crt0.S
@@ -28,7 +28,7 @@ p_etext: .8byte _etext
p_bss_start: .8byte __bss_start
p_end: .8byte _end
-p_toc: .8byte __toc_start + 0x8000 - p_base
+p_toc: .8byte .TOC. - p_base
p_dyn: .8byte __dynamic_start - p_base
p_rela: .8byte __rela_dyn_start - p_base
p_prom: .8byte 0
@@ -226,16 +226,19 @@ p_base: mflr r10 /* r10 now points to runtime addr of p_base */
#ifdef __powerpc64__
#define PROM_FRAME_SIZE 512
-#define SAVE_GPR(n, base) std n,8*(n)(base)
-#define REST_GPR(n, base) ld n,8*(n)(base)
-#define SAVE_2GPRS(n, base) SAVE_GPR(n, base); SAVE_GPR(n+1, base)
-#define SAVE_4GPRS(n, base) SAVE_2GPRS(n, base); SAVE_2GPRS(n+2, base)
-#define SAVE_8GPRS(n, base) SAVE_4GPRS(n, base); SAVE_4GPRS(n+4, base)
-#define SAVE_10GPRS(n, base) SAVE_8GPRS(n, base); SAVE_2GPRS(n+8, base)
-#define REST_2GPRS(n, base) REST_GPR(n, base); REST_GPR(n+1, base)
-#define REST_4GPRS(n, base) REST_2GPRS(n, base); REST_2GPRS(n+2, base)
-#define REST_8GPRS(n, base) REST_4GPRS(n, base); REST_4GPRS(n+4, base)
-#define REST_10GPRS(n, base) REST_8GPRS(n, base); REST_2GPRS(n+8, base)
+
+.macro OP_REGS op, width, start, end, base, offset
+ .Lreg=\start
+ .rept (\end - \start + 1)
+ \op .Lreg,\offset+\width*.Lreg(\base)
+ .Lreg=.Lreg+1
+ .endr
+.endm
+
+#define SAVE_GPRS(start, end, base) OP_REGS std, 8, start, end, base, 0
+#define REST_GPRS(start, end, base) OP_REGS ld, 8, start, end, base, 0
+#define SAVE_GPR(n, base) SAVE_GPRS(n, n, base)
+#define REST_GPR(n, base) REST_GPRS(n, n, base)
/* prom handles the jump into and return from firmware. The prom args pointer
is loaded in r3. */
@@ -246,9 +249,7 @@ prom:
stdu r1,-PROM_FRAME_SIZE(r1) /* Save SP and create stack space */
SAVE_GPR(2, r1)
- SAVE_GPR(13, r1)
- SAVE_8GPRS(14, r1)
- SAVE_10GPRS(22, r1)
+ SAVE_GPRS(13, 31, r1)
mfcr r10
std r10,8*32(r1)
mfmsr r10
@@ -283,9 +284,7 @@ prom:
/* Restore other registers */
REST_GPR(2, r1)
- REST_GPR(13, r1)
- REST_8GPRS(14, r1)
- REST_10GPRS(22, r1)
+ REST_GPRS(13, 31, r1)
ld r10,8*32(r1)
mtcr r10
diff --git a/arch/powerpc/boot/dts/digsy_mtc.dts b/arch/powerpc/boot/dts/digsy_mtc.dts
index 57024a4c1e7d..dfaf974c0ce6 100644
--- a/arch/powerpc/boot/dts/digsy_mtc.dts
+++ b/arch/powerpc/boot/dts/digsy_mtc.dts
@@ -25,14 +25,6 @@
status = "disabled";
};
- spi@f00 {
- msp430@0 {
- compatible = "spidev";
- spi-max-frequency = <32000>;
- reg = <0>;
- };
- };
-
psc@2000 { // PSC1
status = "disabled";
};
diff --git a/arch/powerpc/boot/dts/o2d.dtsi b/arch/powerpc/boot/dts/o2d.dtsi
index b55a9e5bd828..7e52509fa506 100644
--- a/arch/powerpc/boot/dts/o2d.dtsi
+++ b/arch/powerpc/boot/dts/o2d.dtsi
@@ -34,12 +34,6 @@
#address-cells = <1>;
#size-cells = <0>;
cell-index = <0>;
-
- spidev@0 {
- compatible = "spidev";
- spi-max-frequency = <250000>;
- reg = <0>;
- };
};
psc@2200 { // PSC2
diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S
index d6f072865627..d65cd55a6f38 100644
--- a/arch/powerpc/boot/zImage.lds.S
+++ b/arch/powerpc/boot/zImage.lds.S
@@ -36,12 +36,9 @@ SECTIONS
}
#ifdef CONFIG_PPC64_BOOT_WRAPPER
- . = ALIGN(256);
- .got :
+ .got : ALIGN(256)
{
- __toc_start = .;
- *(.got)
- *(.toc)
+ *(.got .toc)
}
#endif
diff --git a/arch/powerpc/configs/microwatt_defconfig b/arch/powerpc/configs/microwatt_defconfig
index 07d87a4044b2..eff933ebbb9e 100644
--- a/arch/powerpc/configs/microwatt_defconfig
+++ b/arch/powerpc/configs/microwatt_defconfig
@@ -15,6 +15,8 @@ CONFIG_EMBEDDED=y
# CONFIG_COMPAT_BRK is not set
# CONFIG_SLAB_MERGE_DEFAULT is not set
CONFIG_PPC64=y
+CONFIG_POWER9_CPU=y
+# CONFIG_PPC_64S_HASH_MMU is not set
# CONFIG_PPC_KUEP is not set
# CONFIG_PPC_KUAP is not set
CONFIG_CPU_LITTLE_ENDIAN=y
@@ -27,7 +29,6 @@ CONFIG_PPC_MICROWATT=y
CONFIG_CPU_FREQ=y
CONFIG_HZ_100=y
CONFIG_PPC_4K_PAGES=y
-# CONFIG_PPC_MEM_KEYS is not set
# CONFIG_SECCOMP is not set
# CONFIG_MQ_IOSCHED_KYBER is not set
# CONFIG_COREDUMP is not set
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index 203d0b7f0bb8..c8b0e80d613b 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -26,7 +26,6 @@ CONFIG_PPC64=y
CONFIG_NR_CPUS=2048
CONFIG_PPC_SPLPAR=y
CONFIG_DTL=y
-CONFIG_SCANLOG=m
CONFIG_PPC_SMLPAR=y
CONFIG_IBMEBUS=y
CONFIG_PPC_SVM=y
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index e64f2242abe1..b571d084c148 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -38,7 +38,6 @@ CONFIG_MODULE_SRCVERSION_ALL=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_PPC_SPLPAR=y
CONFIG_DTL=y
-CONFIG_SCANLOG=m
CONFIG_PPC_SMLPAR=y
CONFIG_IBMEBUS=y
CONFIG_PAPR_SCM=m
diff --git a/arch/powerpc/crypto/md5-asm.S b/arch/powerpc/crypto/md5-asm.S
index 948d100a2934..fa6bc440cf4a 100644
--- a/arch/powerpc/crypto/md5-asm.S
+++ b/arch/powerpc/crypto/md5-asm.S
@@ -38,15 +38,11 @@
#define INITIALIZE \
PPC_STLU r1,-INT_FRAME_SIZE(r1); \
- SAVE_8GPRS(14, r1); /* push registers onto stack */ \
- SAVE_4GPRS(22, r1); \
- SAVE_GPR(26, r1)
+ SAVE_GPRS(14, 26, r1) /* push registers onto stack */
#define FINALIZE \
- REST_8GPRS(14, r1); /* pop registers from stack */ \
- REST_4GPRS(22, r1); \
- REST_GPR(26, r1); \
- addi r1,r1,INT_FRAME_SIZE;
+ REST_GPRS(14, 26, r1); /* pop registers from stack */ \
+ addi r1,r1,INT_FRAME_SIZE
#ifdef __BIG_ENDIAN__
#define LOAD_DATA(reg, off) \
diff --git a/arch/powerpc/crypto/sha1-powerpc-asm.S b/arch/powerpc/crypto/sha1-powerpc-asm.S
index 23e248beff71..f0d5ed557ab1 100644
--- a/arch/powerpc/crypto/sha1-powerpc-asm.S
+++ b/arch/powerpc/crypto/sha1-powerpc-asm.S
@@ -125,8 +125,7 @@
_GLOBAL(powerpc_sha_transform)
PPC_STLU r1,-INT_FRAME_SIZE(r1)
- SAVE_8GPRS(14, r1)
- SAVE_10GPRS(22, r1)
+ SAVE_GPRS(14, 31, r1)
/* Load up A - E */
lwz RA(0),0(r3) /* A */
@@ -184,7 +183,6 @@ _GLOBAL(powerpc_sha_transform)
stw RD(0),12(r3)
stw RE(0),16(r3)
- REST_8GPRS(14, r1)
- REST_10GPRS(22, r1)
+ REST_GPRS(14, 31, r1)
addi r1,r1,INT_FRAME_SIZE
blr
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index 222823861a67..41b8a1e1144a 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -141,11 +141,6 @@ static inline void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr,
bool preserve_nv) { }
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-void kvmhv_save_host_pmu(void);
-void kvmhv_load_host_pmu(void);
-void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
-void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
-
void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu);
long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index fd594fdbd84d..853dc86864f4 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -37,62 +37,62 @@ static __inline__ void arch_atomic_set(atomic_t *v, int i)
__asm__ __volatile__("stw%U0%X0 %1,%0" : "=m<>"(v->counter) : "r"(i));
}
-#define ATOMIC_OP(op, asm_op) \
+#define ATOMIC_OP(op, asm_op, suffix, sign, ...) \
static __inline__ void arch_atomic_##op(int a, atomic_t *v) \
{ \
int t; \
\
__asm__ __volatile__( \
"1: lwarx %0,0,%3 # atomic_" #op "\n" \
- #asm_op " %0,%2,%0\n" \
+ #asm_op "%I2" suffix " %0,%0,%2\n" \
" stwcx. %0,0,%3 \n" \
" bne- 1b\n" \
: "=&r" (t), "+m" (v->counter) \
- : "r" (a), "r" (&v->counter) \
- : "cc"); \
+ : "r"#sign (a), "r" (&v->counter) \
+ : "cc", ##__VA_ARGS__); \
} \
-#define ATOMIC_OP_RETURN_RELAXED(op, asm_op) \
+#define ATOMIC_OP_RETURN_RELAXED(op, asm_op, suffix, sign, ...) \
static inline int arch_atomic_##op##_return_relaxed(int a, atomic_t *v) \
{ \
int t; \
\
__asm__ __volatile__( \
"1: lwarx %0,0,%3 # atomic_" #op "_return_relaxed\n" \
- #asm_op " %0,%2,%0\n" \
+ #asm_op "%I2" suffix " %0,%0,%2\n" \
" stwcx. %0,0,%3\n" \
" bne- 1b\n" \
: "=&r" (t), "+m" (v->counter) \
- : "r" (a), "r" (&v->counter) \
- : "cc"); \
+ : "r"#sign (a), "r" (&v->counter) \
+ : "cc", ##__VA_ARGS__); \
\
return t; \
}
-#define ATOMIC_FETCH_OP_RELAXED(op, asm_op) \
+#define ATOMIC_FETCH_OP_RELAXED(op, asm_op, suffix, sign, ...) \
static inline int arch_atomic_fetch_##op##_relaxed(int a, atomic_t *v) \
{ \
int res, t; \
\
__asm__ __volatile__( \
"1: lwarx %0,0,%4 # atomic_fetch_" #op "_relaxed\n" \
- #asm_op " %1,%3,%0\n" \
+ #asm_op "%I3" suffix " %1,%0,%3\n" \
" stwcx. %1,0,%4\n" \
" bne- 1b\n" \
: "=&r" (res), "=&r" (t), "+m" (v->counter) \
- : "r" (a), "r" (&v->counter) \
- : "cc"); \
+ : "r"#sign (a), "r" (&v->counter) \
+ : "cc", ##__VA_ARGS__); \
\
return res; \
}
-#define ATOMIC_OPS(op, asm_op) \
- ATOMIC_OP(op, asm_op) \
- ATOMIC_OP_RETURN_RELAXED(op, asm_op) \
- ATOMIC_FETCH_OP_RELAXED(op, asm_op)
+#define ATOMIC_OPS(op, asm_op, suffix, sign, ...) \
+ ATOMIC_OP(op, asm_op, suffix, sign, ##__VA_ARGS__) \
+ ATOMIC_OP_RETURN_RELAXED(op, asm_op, suffix, sign, ##__VA_ARGS__)\
+ ATOMIC_FETCH_OP_RELAXED(op, asm_op, suffix, sign, ##__VA_ARGS__)
-ATOMIC_OPS(add, add)
-ATOMIC_OPS(sub, subf)
+ATOMIC_OPS(add, add, "c", I, "xer")
+ATOMIC_OPS(sub, sub, "c", I, "xer")
#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
@@ -101,13 +101,13 @@ ATOMIC_OPS(sub, subf)
#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
#undef ATOMIC_OPS
-#define ATOMIC_OPS(op, asm_op) \
- ATOMIC_OP(op, asm_op) \
- ATOMIC_FETCH_OP_RELAXED(op, asm_op)
+#define ATOMIC_OPS(op, asm_op, suffix, sign) \
+ ATOMIC_OP(op, asm_op, suffix, sign) \
+ ATOMIC_FETCH_OP_RELAXED(op, asm_op, suffix, sign)
-ATOMIC_OPS(and, and)
-ATOMIC_OPS(or, or)
-ATOMIC_OPS(xor, xor)
+ATOMIC_OPS(and, and, ".", K)
+ATOMIC_OPS(or, or, "", K)
+ATOMIC_OPS(xor, xor, "", K)
#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed
@@ -118,71 +118,6 @@ ATOMIC_OPS(xor, xor)
#undef ATOMIC_OP_RETURN_RELAXED
#undef ATOMIC_OP
-static __inline__ void arch_atomic_inc(atomic_t *v)
-{
- int t;
-
- __asm__ __volatile__(
-"1: lwarx %0,0,%2 # atomic_inc\n\
- addic %0,%0,1\n"
-" stwcx. %0,0,%2 \n\
- bne- 1b"
- : "=&r" (t), "+m" (v->counter)
- : "r" (&v->counter)
- : "cc", "xer");
-}
-#define arch_atomic_inc arch_atomic_inc
-
-static __inline__ int arch_atomic_inc_return_relaxed(atomic_t *v)
-{
- int t;
-
- __asm__ __volatile__(
-"1: lwarx %0,0,%2 # atomic_inc_return_relaxed\n"
-" addic %0,%0,1\n"
-" stwcx. %0,0,%2\n"
-" bne- 1b"
- : "=&r" (t), "+m" (v->counter)
- : "r" (&v->counter)
- : "cc", "xer");
-
- return t;
-}
-
-static __inline__ void arch_atomic_dec(atomic_t *v)
-{
- int t;
-
- __asm__ __volatile__(
-"1: lwarx %0,0,%2 # atomic_dec\n\
- addic %0,%0,-1\n"
-" stwcx. %0,0,%2\n\
- bne- 1b"
- : "=&r" (t), "+m" (v->counter)
- : "r" (&v->counter)
- : "cc", "xer");
-}
-#define arch_atomic_dec arch_atomic_dec
-
-static __inline__ int arch_atomic_dec_return_relaxed(atomic_t *v)
-{
- int t;
-
- __asm__ __volatile__(
-"1: lwarx %0,0,%2 # atomic_dec_return_relaxed\n"
-" addic %0,%0,-1\n"
-" stwcx. %0,0,%2\n"
-" bne- 1b"
- : "=&r" (t), "+m" (v->counter)
- : "r" (&v->counter)
- : "cc", "xer");
-
- return t;
-}
-
-#define arch_atomic_inc_return_relaxed arch_atomic_inc_return_relaxed
-#define arch_atomic_dec_return_relaxed arch_atomic_dec_return_relaxed
-
#define arch_atomic_cmpxchg(v, o, n) \
(arch_cmpxchg(&((v)->counter), (o), (n)))
#define arch_atomic_cmpxchg_relaxed(v, o, n) \
@@ -241,50 +176,20 @@ static __inline__ int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
"1: lwarx %0,0,%1 # atomic_fetch_add_unless\n\
cmpw 0,%0,%3 \n\
beq 2f \n\
- add %0,%2,%0 \n"
+ add%I2c %0,%0,%2 \n"
" stwcx. %0,0,%1 \n\
bne- 1b \n"
PPC_ATOMIC_EXIT_BARRIER
-" subf %0,%2,%0 \n\
+" sub%I2c %0,%0,%2 \n\
2:"
: "=&r" (t)
- : "r" (&v->counter), "r" (a), "r" (u)
- : "cc", "memory");
+ : "r" (&v->counter), "rI" (a), "r" (u)
+ : "cc", "memory", "xer");
return t;
}
#define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless
-/**
- * atomic_inc_not_zero - increment unless the number is zero
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1, so long as @v is non-zero.
- * Returns non-zero if @v was non-zero, and zero otherwise.
- */
-static __inline__ int arch_atomic_inc_not_zero(atomic_t *v)
-{
- int t1, t2;
-
- __asm__ __volatile__ (
- PPC_ATOMIC_ENTRY_BARRIER
-"1: lwarx %0,0,%2 # atomic_inc_not_zero\n\
- cmpwi 0,%0,0\n\
- beq- 2f\n\
- addic %1,%0,1\n"
-" stwcx. %1,0,%2\n\
- bne- 1b\n"
- PPC_ATOMIC_EXIT_BARRIER
- "\n\
-2:"
- : "=&r" (t1), "=&r" (t2)
- : "r" (&v->counter)
- : "cc", "xer", "memory");
-
- return t1;
-}
-#define arch_atomic_inc_not_zero(v) arch_atomic_inc_not_zero((v))
-
/*
* Atomically test *v and decrement if it is greater than 0.
* The function returns the old value of *v minus 1, even if
diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
index 11847b6a244e..a05d8c62cbea 100644
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h
@@ -71,19 +71,61 @@ static inline void fn(unsigned long mask, \
__asm__ __volatile__ ( \
prefix \
"1:" PPC_LLARX "%0,0,%3,0\n" \
- stringify_in_c(op) "%0,%0,%2\n" \
+ #op "%I2 %0,%0,%2\n" \
PPC_STLCX "%0,0,%3\n" \
"bne- 1b\n" \
: "=&r" (old), "+m" (*p) \
- : "r" (mask), "r" (p) \
+ : "rK" (mask), "r" (p) \
: "cc", "memory"); \
}
DEFINE_BITOP(set_bits, or, "")
-DEFINE_BITOP(clear_bits, andc, "")
-DEFINE_BITOP(clear_bits_unlock, andc, PPC_RELEASE_BARRIER)
DEFINE_BITOP(change_bits, xor, "")
+static __always_inline bool is_rlwinm_mask_valid(unsigned long x)
+{
+ if (!x)
+ return false;
+ if (x & 1)
+ x = ~x; // make the mask non-wrapping
+ x += x & -x; // adding the low set bit results in at most one bit set
+
+ return !(x & (x - 1));
+}
+
+#define DEFINE_CLROP(fn, prefix) \
+static inline void fn(unsigned long mask, volatile unsigned long *_p) \
+{ \
+ unsigned long old; \
+ unsigned long *p = (unsigned long *)_p; \
+ \
+ if (IS_ENABLED(CONFIG_PPC32) && \
+ __builtin_constant_p(mask) && is_rlwinm_mask_valid(~mask)) {\
+ asm volatile ( \
+ prefix \
+ "1:" "lwarx %0,0,%3\n" \
+ "rlwinm %0,%0,0,%2\n" \
+ "stwcx. %0,0,%3\n" \
+ "bne- 1b\n" \
+ : "=&r" (old), "+m" (*p) \
+ : "n" (~mask), "r" (p) \
+ : "cc", "memory"); \
+ } else { \
+ asm volatile ( \
+ prefix \
+ "1:" PPC_LLARX "%0,0,%3,0\n" \
+ "andc %0,%0,%2\n" \
+ PPC_STLCX "%0,0,%3\n" \
+ "bne- 1b\n" \
+ : "=&r" (old), "+m" (*p) \
+ : "r" (mask), "r" (p) \
+ : "cc", "memory"); \
+ } \
+}
+
+DEFINE_CLROP(clear_bits, "")
+DEFINE_CLROP(clear_bits_unlock, PPC_RELEASE_BARRIER)
+
static inline void arch_set_bit(int nr, volatile unsigned long *addr)
{
set_bits(BIT_MASK(nr), addr + BIT_WORD(nr));
@@ -116,12 +158,12 @@ static inline unsigned long fn( \
__asm__ __volatile__ ( \
prefix \
"1:" PPC_LLARX "%0,0,%3,%4\n" \
- stringify_in_c(op) "%1,%0,%2\n" \
+ #op "%I2 %1,%0,%2\n" \
PPC_STLCX "%1,0,%3\n" \
"bne- 1b\n" \
postfix \
: "=&r" (old), "=&r" (t) \
- : "r" (mask), "r" (p), "i" (IS_ENABLED(CONFIG_PPC64) ? eh : 0) \
+ : "rK" (mask), "r" (p), "i" (IS_ENABLED(CONFIG_PPC64) ? eh : 0) \
: "cc", "memory"); \
return (old & mask); \
}
@@ -130,11 +172,42 @@ DEFINE_TESTOP(test_and_set_bits, or, PPC_ATOMIC_ENTRY_BARRIER,
PPC_ATOMIC_EXIT_BARRIER, 0)
DEFINE_TESTOP(test_and_set_bits_lock, or, "",
PPC_ACQUIRE_BARRIER, 1)
-DEFINE_TESTOP(test_and_clear_bits, andc, PPC_ATOMIC_ENTRY_BARRIER,
- PPC_ATOMIC_EXIT_BARRIER, 0)
DEFINE_TESTOP(test_and_change_bits, xor, PPC_ATOMIC_ENTRY_BARRIER,
PPC_ATOMIC_EXIT_BARRIER, 0)
+static inline unsigned long test_and_clear_bits(unsigned long mask, volatile unsigned long *_p)
+{
+ unsigned long old, t;
+ unsigned long *p = (unsigned long *)_p;
+
+ if (IS_ENABLED(CONFIG_PPC32) &&
+ __builtin_constant_p(mask) && is_rlwinm_mask_valid(~mask)) {
+ asm volatile (
+ PPC_ATOMIC_ENTRY_BARRIER
+ "1:" "lwarx %0,0,%3\n"
+ "rlwinm %1,%0,0,%2\n"
+ "stwcx. %1,0,%3\n"
+ "bne- 1b\n"
+ PPC_ATOMIC_EXIT_BARRIER
+ : "=&r" (old), "=&r" (t)
+ : "n" (~mask), "r" (p)
+ : "cc", "memory");
+ } else {
+ asm volatile (
+ PPC_ATOMIC_ENTRY_BARRIER
+ "1:" PPC_LLARX "%0,0,%3,0\n"
+ "andc %1,%0,%2\n"
+ PPC_STLCX "%1,0,%3\n"
+ "bne- 1b\n"
+ PPC_ATOMIC_EXIT_BARRIER
+ : "=&r" (old), "=&r" (t)
+ : "r" (mask), "r" (p)
+ : "cc", "memory");
+ }
+
+ return (old & mask);
+}
+
static inline int arch_test_and_set_bit(unsigned long nr,
volatile unsigned long *addr)
{
diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index 9f38040f0641..678f9c9d89b6 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -12,50 +12,10 @@
#include <linux/jump_label.h>
extern struct static_key_false disable_kuap_key;
-extern struct static_key_false disable_kuep_key;
-
-static __always_inline bool kuap_is_disabled(void)
-{
- return !IS_ENABLED(CONFIG_PPC_KUAP) || static_branch_unlikely(&disable_kuap_key);
-}
static __always_inline bool kuep_is_disabled(void)
{
- return !IS_ENABLED(CONFIG_PPC_KUEP) || static_branch_unlikely(&disable_kuep_key);
-}
-
-static inline void kuep_lock(void)
-{
- if (kuep_is_disabled())
- return;
-
- update_user_segments(mfsr(0) | SR_NX);
- /*
- * This isync() shouldn't be necessary as the kernel is not excepted to
- * run any instruction in userspace soon after the update of segments,
- * but hash based cores (at least G3) seem to exhibit a random
- * behaviour when the 'isync' is not there. 603 cores don't have this
- * behaviour so don't do the 'isync' as it saves several CPU cycles.
- */
- if (mmu_has_feature(MMU_FTR_HPTE_TABLE))
- isync(); /* Context sync required after mtsr() */
-}
-
-static inline void kuep_unlock(void)
-{
- if (kuep_is_disabled())
- return;
-
- update_user_segments(mfsr(0) & ~SR_NX);
- /*
- * This isync() shouldn't be necessary as a 'rfi' will soon be executed
- * to return to userspace, but hash based cores (at least G3) seem to
- * exhibit a random behaviour when the 'isync' is not there. 603 cores
- * don't have this behaviour so don't do the 'isync' as it saves several
- * CPU cycles.
- */
- if (mmu_has_feature(MMU_FTR_HPTE_TABLE))
- isync(); /* Context sync required after mtsr() */
+ return !IS_ENABLED(CONFIG_PPC_KUEP);
}
#ifdef CONFIG_PPC_KUAP
@@ -65,6 +25,11 @@ static inline void kuep_unlock(void)
#define KUAP_NONE (~0UL)
#define KUAP_ALL (~1UL)
+static __always_inline bool kuap_is_disabled(void)
+{
+ return static_branch_unlikely(&disable_kuap_key);
+}
+
static inline void kuap_lock_one(unsigned long addr)
{
mtsr(mfsr(addr) | SR_KS, addr);
@@ -92,7 +57,7 @@ static inline void kuap_unlock_all(void)
void kuap_lock_all_ool(void);
void kuap_unlock_all_ool(void);
-static inline void kuap_lock(unsigned long addr, bool ool)
+static inline void kuap_lock_addr(unsigned long addr, bool ool)
{
if (likely(addr != KUAP_ALL))
kuap_lock_one(addr);
@@ -112,33 +77,31 @@ static inline void kuap_unlock(unsigned long addr, bool ool)
kuap_unlock_all_ool();
}
-static inline void kuap_save_and_lock(struct pt_regs *regs)
+static inline void __kuap_lock(void)
{
- unsigned long kuap = current->thread.kuap;
+}
- if (kuap_is_disabled())
- return;
+static inline void __kuap_save_and_lock(struct pt_regs *regs)
+{
+ unsigned long kuap = current->thread.kuap;
regs->kuap = kuap;
if (unlikely(kuap == KUAP_NONE))
return;
current->thread.kuap = KUAP_NONE;
- kuap_lock(kuap, false);
+ kuap_lock_addr(kuap, false);
}
static inline void kuap_user_restore(struct pt_regs *regs)
{
}
-static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
+static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
{
- if (kuap_is_disabled())
- return;
-
if (unlikely(kuap != KUAP_NONE)) {
current->thread.kuap = KUAP_NONE;
- kuap_lock(kuap, false);
+ kuap_lock_addr(kuap, false);
}
if (likely(regs->kuap == KUAP_NONE))
@@ -149,29 +112,18 @@ static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
kuap_unlock(regs->kuap, false);
}
-static inline unsigned long kuap_get_and_assert_locked(void)
+static inline unsigned long __kuap_get_and_assert_locked(void)
{
unsigned long kuap = current->thread.kuap;
- if (kuap_is_disabled())
- return KUAP_NONE;
-
WARN_ON_ONCE(IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && kuap != KUAP_NONE);
return kuap;
}
-static inline void kuap_assert_locked(void)
-{
- kuap_get_and_assert_locked();
-}
-
-static __always_inline void allow_user_access(void __user *to, const void __user *from,
- u32 size, unsigned long dir)
+static __always_inline void __allow_user_access(void __user *to, const void __user *from,
+ u32 size, unsigned long dir)
{
- if (kuap_is_disabled())
- return;
-
BUILD_BUG_ON(!__builtin_constant_p(dir));
if (!(dir & KUAP_WRITE))
@@ -181,42 +133,33 @@ static __always_inline void allow_user_access(void __user *to, const void __user
kuap_unlock_one((__force u32)to);
}
-static __always_inline void prevent_user_access(unsigned long dir)
+static __always_inline void __prevent_user_access(unsigned long dir)
{
u32 kuap = current->thread.kuap;
- if (kuap_is_disabled())
- return;
-
BUILD_BUG_ON(!__builtin_constant_p(dir));
if (!(dir & KUAP_WRITE))
return;
current->thread.kuap = KUAP_NONE;
- kuap_lock(kuap, true);
+ kuap_lock_addr(kuap, true);
}
-static inline unsigned long prevent_user_access_return(void)
+static inline unsigned long __prevent_user_access_return(void)
{
unsigned long flags = current->thread.kuap;
- if (kuap_is_disabled())
- return KUAP_NONE;
-
if (flags != KUAP_NONE) {
current->thread.kuap = KUAP_NONE;
- kuap_lock(flags, true);
+ kuap_lock_addr(flags, true);
}
return flags;
}
-static inline void restore_user_access(unsigned long flags)
+static inline void __restore_user_access(unsigned long flags)
{
- if (kuap_is_disabled())
- return;
-
if (flags != KUAP_NONE) {
current->thread.kuap = flags;
kuap_unlock(flags, true);
@@ -224,13 +167,10 @@ static inline void restore_user_access(unsigned long flags)
}
static inline bool
-bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+__bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
unsigned long kuap = regs->kuap;
- if (kuap_is_disabled())
- return false;
-
if (!is_write || kuap == KUAP_ALL)
return false;
if (kuap == KUAP_NONE)
diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
index f5be185cbdf8..7be27862329f 100644
--- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
@@ -64,7 +64,82 @@ struct ppc_bat {
#define SR_KP 0x20000000 /* User key */
#define SR_KS 0x40000000 /* Supervisor key */
-#ifndef __ASSEMBLY__
+#ifdef __ASSEMBLY__
+
+#include <asm/asm-offsets.h>
+
+.macro uus_addi sr reg1 reg2 imm
+ .if NUM_USER_SEGMENTS > \sr
+ addi \reg1,\reg2,\imm
+ .endif
+.endm
+
+.macro uus_mtsr sr reg1
+ .if NUM_USER_SEGMENTS > \sr
+ mtsr \sr, \reg1
+ .endif
+.endm
+
+/*
+ * This isync() shouldn't be necessary as the kernel is not excepted to run
+ * any instruction in userspace soon after the update of segments and 'rfi'
+ * instruction is used to return to userspace, but hash based cores
+ * (at least G3) seem to exhibit a random behaviour when the 'isync' is not
+ * there. 603 cores don't have this behaviour so don't do the 'isync' as it
+ * saves several CPU cycles.
+ */
+.macro uus_isync
+#ifdef CONFIG_PPC_BOOK3S_604
+BEGIN_MMU_FTR_SECTION
+ isync
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
+#endif
+.endm
+
+.macro update_user_segments_by_4 tmp1 tmp2 tmp3 tmp4
+ uus_addi 1, \tmp2, \tmp1, 0x111
+ uus_addi 2, \tmp3, \tmp1, 0x222
+ uus_addi 3, \tmp4, \tmp1, 0x333
+
+ uus_mtsr 0, \tmp1
+ uus_mtsr 1, \tmp2
+ uus_mtsr 2, \tmp3
+ uus_mtsr 3, \tmp4
+
+ uus_addi 4, \tmp1, \tmp1, 0x444
+ uus_addi 5, \tmp2, \tmp2, 0x444
+ uus_addi 6, \tmp3, \tmp3, 0x444
+ uus_addi 7, \tmp4, \tmp4, 0x444
+
+ uus_mtsr 4, \tmp1
+ uus_mtsr 5, \tmp2
+ uus_mtsr 6, \tmp3
+ uus_mtsr 7, \tmp4
+
+ uus_addi 8, \tmp1, \tmp1, 0x444
+ uus_addi 9, \tmp2, \tmp2, 0x444
+ uus_addi 10, \tmp3, \tmp3, 0x444
+ uus_addi 11, \tmp4, \tmp4, 0x444
+
+ uus_mtsr 8, \tmp1
+ uus_mtsr 9, \tmp2
+ uus_mtsr 10, \tmp3
+ uus_mtsr 11, \tmp4
+
+ uus_addi 12, \tmp1, \tmp1, 0x444
+ uus_addi 13, \tmp2, \tmp2, 0x444
+ uus_addi 14, \tmp3, \tmp3, 0x444
+ uus_addi 15, \tmp4, \tmp4, 0x444
+
+ uus_mtsr 12, \tmp1
+ uus_mtsr 13, \tmp2
+ uus_mtsr 14, \tmp3
+ uus_mtsr 15, \tmp4
+
+ uus_isync
+.endm
+
+#else
/*
* This macro defines the mapping from contexts to VSIDs (virtual
@@ -100,9 +175,14 @@ struct hash_pte {
typedef struct {
unsigned long id;
+ unsigned long sr0;
void __user *vdso;
} mm_context_t;
+#ifdef CONFIG_PPC_KUEP
+#define INIT_MM_CONTEXT(mm) .context.sr0 = SR_NX
+#endif
+
void update_bats(void);
static inline void cleanup_cpu_mmu_context(void) { }
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 674fe0e890dc..a7a0572f3846 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -99,10 +99,6 @@
* Defines the address of the vmemap area, in its own region on
* hash table CPUs.
*/
-#ifdef CONFIG_PPC_MM_SLICES
-#define HAVE_ARCH_UNMAPPED_AREA
-#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
-#endif /* CONFIG_PPC_MM_SLICES */
/* PTEIDX nibble */
#define _PTEIDX_SECONDARY 0x8
diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h
index 170339969b7c..69fcf63eec94 100644
--- a/arch/powerpc/include/asm/book3s/64/kup.h
+++ b/arch/powerpc/include/asm/book3s/64/kup.h
@@ -229,6 +229,11 @@ static inline u64 current_thread_iamr(void)
#ifdef CONFIG_PPC_KUAP
+static __always_inline bool kuap_is_disabled(void)
+{
+ return !mmu_has_feature(MMU_FTR_BOOK3S_KUAP);
+}
+
static inline void kuap_user_restore(struct pt_regs *regs)
{
bool restore_amr = false, restore_iamr = false;
@@ -268,40 +273,38 @@ static inline void kuap_user_restore(struct pt_regs *regs)
*/
}
-static inline void kuap_kernel_restore(struct pt_regs *regs,
- unsigned long amr)
+static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long amr)
{
- if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) {
- if (unlikely(regs->amr != amr)) {
- isync();
- mtspr(SPRN_AMR, regs->amr);
- /*
- * No isync required here because we are about to rfi
- * back to previous context before any user accesses
- * would be made, which is a CSI.
- */
- }
- }
+ if (likely(regs->amr == amr))
+ return;
+
+ isync();
+ mtspr(SPRN_AMR, regs->amr);
/*
+ * No isync required here because we are about to rfi
+ * back to previous context before any user accesses
+ * would be made, which is a CSI.
+ *
* No need to restore IAMR when returning to kernel space.
*/
}
-static inline unsigned long kuap_get_and_assert_locked(void)
+static inline unsigned long __kuap_get_and_assert_locked(void)
{
- if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) {
- unsigned long amr = mfspr(SPRN_AMR);
- if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) /* kuap_check_amr() */
- WARN_ON_ONCE(amr != AMR_KUAP_BLOCKED);
- return amr;
- }
- return 0;
+ unsigned long amr = mfspr(SPRN_AMR);
+
+ if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) /* kuap_check_amr() */
+ WARN_ON_ONCE(amr != AMR_KUAP_BLOCKED);
+ return amr;
}
-static inline void kuap_assert_locked(void)
+/* Do nothing, book3s/64 does that in ASM */
+static inline void __kuap_lock(void)
+{
+}
+
+static inline void __kuap_save_and_lock(struct pt_regs *regs)
{
- if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && mmu_has_feature(MMU_FTR_BOOK3S_KUAP))
- WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED);
}
/*
@@ -339,11 +342,8 @@ static inline void set_kuap(unsigned long value)
isync();
}
-static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address,
- bool is_write)
+static inline bool __bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
- if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP))
- return false;
/*
* For radix this will be a storage protection fault (DSISR_PROTFAULT).
* For hash this will be a key fault (DSISR_KEYFAULT)
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 3004f3323144..21f780942911 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -523,8 +523,14 @@ void slb_save_contents(struct slb_entry *slb_ptr);
void slb_dump_contents(struct slb_entry *slb_ptr);
extern void slb_vmalloc_update(void);
-extern void slb_set_size(u16 size);
void preload_new_slb_context(unsigned long start, unsigned long sp);
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+void slb_set_size(u16 size);
+#else
+static inline void slb_set_size(u16 size) { }
+#endif
+
#endif /* __ASSEMBLY__ */
/*
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index c02f42d1031e..ba5b1becf518 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -4,6 +4,12 @@
#include <asm/page.h>
+#ifdef CONFIG_HUGETLB_PAGE
+#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#endif
+#define HAVE_ARCH_UNMAPPED_AREA
+#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
+
#ifndef __ASSEMBLY__
/*
* Page size definition
@@ -62,6 +68,9 @@ extern struct patb_entry *partition_tb;
#define PRTS_MASK 0x1f /* process table size field */
#define PRTB_MASK 0x0ffffffffffff000UL
+/* Number of supported LPID bits */
+extern unsigned int mmu_lpid_bits;
+
/* Number of supported PID bits */
extern unsigned int mmu_pid_bits;
@@ -76,10 +85,8 @@ extern unsigned long __ro_after_init radix_mem_block_size;
#define PRTB_SIZE_SHIFT (mmu_pid_bits + 4)
#define PRTB_ENTRIES (1ul << mmu_pid_bits)
-/*
- * Power9 currently only support 64K partition table size.
- */
-#define PATB_SIZE_SHIFT 16
+#define PATB_SIZE_SHIFT (mmu_lpid_bits + 4)
+#define PATB_ENTRIES (1ul << mmu_lpid_bits)
typedef unsigned long mm_context_id_t;
struct spinlock;
@@ -98,7 +105,9 @@ typedef struct {
* from EA and new context ids to build the new VAs.
*/
mm_context_id_t id;
+#ifdef CONFIG_PPC_64S_HASH_MMU
mm_context_id_t extended_id[TASK_SIZE_USER64/TASK_CONTEXT_SIZE];
+#endif
};
/* Number of bits in the mm_cpumask */
@@ -110,7 +119,9 @@ typedef struct {
/* Number of user space windows opened in process mm_context */
atomic_t vas_windows;
+#ifdef CONFIG_PPC_64S_HASH_MMU
struct hash_mm_context *hash_context;
+#endif
void __user *vdso;
/*
@@ -133,6 +144,7 @@ typedef struct {
#endif
} mm_context_t;
+#ifdef CONFIG_PPC_64S_HASH_MMU
static inline u16 mm_ctx_user_psize(mm_context_t *ctx)
{
return ctx->hash_context->user_psize;
@@ -193,8 +205,15 @@ static inline struct subpage_prot_table *mm_ctx_subpage_prot(mm_context_t *ctx)
extern int mmu_linear_psize;
extern int mmu_virtual_psize;
extern int mmu_vmalloc_psize;
-extern int mmu_vmemmap_psize;
extern int mmu_io_psize;
+#else /* CONFIG_PPC_64S_HASH_MMU */
+#ifdef CONFIG_PPC_64K_PAGES
+#define mmu_virtual_psize MMU_PAGE_64K
+#else
+#define mmu_virtual_psize MMU_PAGE_4K
+#endif
+#endif
+extern int mmu_vmemmap_psize;
/* MMU initialization */
void mmu_early_init_devtree(void);
@@ -233,12 +252,13 @@ static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base,
* know which translations we will pick. Hence go with hash
* restrictions.
*/
- return hash__setup_initial_memory_limit(first_memblock_base,
- first_memblock_size);
+ if (!early_radix_enabled())
+ hash__setup_initial_memory_limit(first_memblock_base,
+ first_memblock_size);
}
#ifdef CONFIG_PPC_PSERIES
-extern void radix_init_pseries(void);
+void __init radix_init_pseries(void);
#else
static inline void radix_init_pseries(void) { }
#endif
@@ -255,6 +275,7 @@ static inline void radix_init_pseries(void) { }
void cleanup_cpu_mmu_context(void);
#endif
+#ifdef CONFIG_PPC_64S_HASH_MMU
static inline int get_user_context(mm_context_t *ctx, unsigned long ea)
{
int index = ea >> MAX_EA_BITS_PER_CONTEXT;
@@ -274,6 +295,7 @@ static inline unsigned long get_user_vsid(mm_context_t *ctx,
return get_vsid(context, ea, ssize);
}
+#endif
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
index 3b95769739c7..8b762f282190 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
@@ -112,8 +112,14 @@ static inline void hash__flush_tlb_kernel_range(unsigned long start,
struct mmu_gather;
extern void hash__tlb_flush(struct mmu_gather *tlb);
+void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr);
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
/* Private function for use by PCI IO mapping code */
extern void __flush_hash_table_range(unsigned long start, unsigned long end);
extern void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd,
unsigned long addr);
+#else
+static inline void __flush_hash_table_range(unsigned long start, unsigned long end) { }
+#endif
#endif /* _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H */
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index 215973b4cb26..d2e80f178b6d 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -14,7 +14,6 @@ enum {
TLB_INVAL_SCOPE_LPID = 1, /* invalidate TLBs for current LPID */
};
-#ifdef CONFIG_PPC_NATIVE
static inline void tlbiel_all(void)
{
/*
@@ -30,9 +29,6 @@ static inline void tlbiel_all(void)
else
hash__tlbiel_all(TLB_INVAL_SCOPE_GLOBAL);
}
-#else
-static inline void tlbiel_all(void) { BUG(); }
-#endif
static inline void tlbiel_all_lpid(bool radix)
{
diff --git a/arch/powerpc/include/asm/book3s/pgtable.h b/arch/powerpc/include/asm/book3s/pgtable.h
index ad130e15a126..e8269434ecbe 100644
--- a/arch/powerpc/include/asm/book3s/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/pgtable.h
@@ -25,6 +25,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t vma_prot);
#define __HAVE_PHYS_MEM_ACCESS_PROT
+#if defined(CONFIG_PPC32) || defined(CONFIG_PPC_64S_HASH_MMU)
/*
* This gets called at the end of handling a page fault, when
* the kernel has put a new PTE into the page table for the process.
@@ -35,6 +36,9 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
* waiting for the inevitable extra hash-table miss exception.
*/
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep);
+#else
+static inline void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) {}
+#endif
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/powerpc/include/asm/btext.h b/arch/powerpc/include/asm/btext.h
index 461b0f193864..860f8868f11e 100644
--- a/arch/powerpc/include/asm/btext.h
+++ b/arch/powerpc/include/asm/btext.h
@@ -23,12 +23,12 @@ extern void btext_unmap(void);
extern void btext_drawchar(char c);
extern void btext_drawstring(const char *str);
-extern void btext_drawhex(unsigned long v);
-extern void btext_drawtext(const char *c, unsigned int len);
+void __init btext_drawhex(unsigned long v);
+void __init btext_drawtext(const char *c, unsigned int len);
-extern void btext_clearscreen(void);
-extern void btext_flushscreen(void);
-extern void btext_flushline(void);
+void __init btext_clearscreen(void);
+void __init btext_flushscreen(void);
+void __init btext_flushline(void);
#endif /* __KERNEL__ */
#endif /* __PPC_BTEXT_H */
diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
index 4ba834599c4d..e26080539c31 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -24,20 +24,20 @@
bool is_offset_in_branch_range(long offset);
bool is_offset_in_cond_branch_range(long offset);
-int create_branch(struct ppc_inst *instr, const u32 *addr,
+int create_branch(ppc_inst_t *instr, const u32 *addr,
unsigned long target, int flags);
-int create_cond_branch(struct ppc_inst *instr, const u32 *addr,
+int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
unsigned long target, int flags);
int patch_branch(u32 *addr, unsigned long target, int flags);
-int patch_instruction(u32 *addr, struct ppc_inst instr);
-int raw_patch_instruction(u32 *addr, struct ppc_inst instr);
+int patch_instruction(u32 *addr, ppc_inst_t instr);
+int raw_patch_instruction(u32 *addr, ppc_inst_t instr);
static inline unsigned long patch_site_addr(s32 *site)
{
return (unsigned long)site + *site;
}
-static inline int patch_instruction_site(s32 *site, struct ppc_inst instr)
+static inline int patch_instruction_site(s32 *site, ppc_inst_t instr)
{
return patch_instruction((u32 *)patch_site_addr(site), instr);
}
@@ -58,18 +58,26 @@ static inline int modify_instruction_site(s32 *site, unsigned int clr, unsigned
return modify_instruction((unsigned int *)patch_site_addr(site), clr, set);
}
-int instr_is_relative_branch(struct ppc_inst instr);
-int instr_is_relative_link_branch(struct ppc_inst instr);
+static inline unsigned int branch_opcode(ppc_inst_t instr)
+{
+ return ppc_inst_primary_opcode(instr) & 0x3F;
+}
+
+static inline int instr_is_branch_iform(ppc_inst_t instr)
+{
+ return branch_opcode(instr) == 18;
+}
+
+static inline int instr_is_branch_bform(ppc_inst_t instr)
+{
+ return branch_opcode(instr) == 16;
+}
+
+int instr_is_relative_branch(ppc_inst_t instr);
+int instr_is_relative_link_branch(ppc_inst_t instr);
unsigned long branch_target(const u32 *instr);
-int translate_branch(struct ppc_inst *instr, const u32 *dest, const u32 *src);
-extern bool is_conditional_branch(struct ppc_inst instr);
-#ifdef CONFIG_PPC_BOOK3E_64
-void __patch_exception(int exc, unsigned long addr);
-#define patch_exception(exc, name) do { \
- extern unsigned int name; \
- __patch_exception((exc), (unsigned long)&name); \
-} while (0)
-#endif
+int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src);
+bool is_conditional_branch(ppc_inst_t instr);
#define OP_RT_RA_MASK 0xffff0000UL
#define LIS_R2 (PPC_RAW_LIS(_R2, 0))
diff --git a/arch/powerpc/include/asm/cpm2.h b/arch/powerpc/include/asm/cpm2.h
index bda45788cfcc..9ee192a6c5d7 100644
--- a/arch/powerpc/include/asm/cpm2.h
+++ b/arch/powerpc/include/asm/cpm2.h
@@ -1133,8 +1133,8 @@ enum cpm_clk {
CPM_CLK_DUMMY
};
-extern int cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode);
-extern int cpm2_smc_clk_setup(enum cpm_clk_target target, int clock);
+int __init cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode);
+int __init cpm2_smc_clk_setup(enum cpm_clk_target target, int clock);
#define CPM_PIN_INPUT 0
#define CPM_PIN_OUTPUT 1
@@ -1143,7 +1143,7 @@ extern int cpm2_smc_clk_setup(enum cpm_clk_target target, int clock);
#define CPM_PIN_GPIO 4
#define CPM_PIN_OPENDRAIN 8
-void cpm2_set_pin(int port, int pin, int flags);
+void __init cpm2_set_pin(int port, int pin, int flags);
#endif /* __CPM2__ */
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
index 9844b3ded187..0cce5dc7fb1c 100644
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -85,7 +85,7 @@ extern struct pnv_idle_states_t *pnv_idle_states;
extern int nr_pnv_idle_states;
unsigned long pnv_cpu_offline(unsigned int cpu);
-int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags);
+int __init validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags);
static inline void report_invalid_psscr_val(u64 psscr_val, int err)
{
switch (err) {
diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h
index b167186aaee4..f26c430f3982 100644
--- a/arch/powerpc/include/asm/cputhreads.h
+++ b/arch/powerpc/include/asm/cputhreads.h
@@ -32,44 +32,11 @@ extern cpumask_t threads_core_mask;
#define threads_core_mask (*get_cpu_mask(0))
#endif
-/* cpu_thread_mask_to_cores - Return a cpumask of one per cores
- * hit by the argument
- *
- * @threads: a cpumask of online threads
- *
- * This function returns a cpumask which will have one online cpu's
- * bit set for each core that has at least one thread set in the argument.
- *
- * This can typically be used for things like IPI for tlb invalidations
- * since those need to be done only once per core/TLB
- */
-static inline cpumask_t cpu_thread_mask_to_cores(const struct cpumask *threads)
-{
- cpumask_t tmp, res;
- int i, cpu;
-
- cpumask_clear(&res);
- for (i = 0; i < NR_CPUS; i += threads_per_core) {
- cpumask_shift_left(&tmp, &threads_core_mask, i);
- if (cpumask_intersects(threads, &tmp)) {
- cpu = cpumask_next_and(-1, &tmp, cpu_online_mask);
- if (cpu < nr_cpu_ids)
- cpumask_set_cpu(cpu, &res);
- }
- }
- return res;
-}
-
static inline int cpu_nr_cores(void)
{
return nr_cpu_ids >> threads_shift;
}
-static inline cpumask_t cpu_online_cores_map(void)
-{
- return cpu_thread_mask_to_cores(cpu_online_mask);
-}
-
#ifdef CONFIG_SMP
int cpu_core_index_of_thread(int cpu);
int cpu_first_thread_of_core(int core);
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index b1a5bba2e0b9..bd513fd49be9 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -460,7 +460,7 @@ static inline void eeh_readsl(const volatile void __iomem *addr, void * buf,
}
-void eeh_cache_debugfs_init(void);
+void __init eeh_cache_debugfs_init(void);
#endif /* CONFIG_PPC64 */
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h
index 40cdcb2fb057..b1ef1e92c34a 100644
--- a/arch/powerpc/include/asm/exception-64e.h
+++ b/arch/powerpc/include/asm/exception-64e.h
@@ -149,6 +149,10 @@ exc_##label##_book3e:
addi r11,r13,PACA_EXTLB; \
TLB_MISS_RESTORE(r11)
+#ifndef __ASSEMBLY__
+extern unsigned int interrupt_base_book3e;
+#endif
+
#define SET_IVOR(vector_number, vector_offset) \
LOAD_REG_ADDR(r3,interrupt_base_book3e);\
ori r3,r3,vector_offset@l; \
diff --git a/arch/powerpc/include/asm/fadump-internal.h b/arch/powerpc/include/asm/fadump-internal.h
index 8d61c8f3fec4..52189928ec08 100644
--- a/arch/powerpc/include/asm/fadump-internal.h
+++ b/arch/powerpc/include/asm/fadump-internal.h
@@ -137,10 +137,10 @@ struct fadump_ops {
};
/* Helper functions */
-s32 fadump_setup_cpu_notes_buf(u32 num_cpus);
+s32 __init fadump_setup_cpu_notes_buf(u32 num_cpus);
void fadump_free_cpu_notes_buf(void);
-u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs);
-void fadump_update_elfcore_header(char *bufp);
+u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs);
+void __init fadump_update_elfcore_header(char *bufp);
bool is_fadump_boot_mem_contiguous(void);
bool is_fadump_reserved_mem_contiguous(void);
diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index 97a3bd9ffeb9..9b702d2b80fb 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -80,8 +80,6 @@ enum {
FW_FEATURE_POWERNV_ALWAYS = 0,
FW_FEATURE_PS3_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
FW_FEATURE_PS3_ALWAYS = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
- FW_FEATURE_NATIVE_POSSIBLE = 0,
- FW_FEATURE_NATIVE_ALWAYS = 0,
FW_FEATURE_POSSIBLE =
#ifdef CONFIG_PPC_PSERIES
FW_FEATURE_PSERIES_POSSIBLE |
@@ -92,9 +90,6 @@ enum {
#ifdef CONFIG_PPC_PS3
FW_FEATURE_PS3_POSSIBLE |
#endif
-#ifdef CONFIG_PPC_NATIVE
- FW_FEATURE_NATIVE_ALWAYS |
-#endif
0,
FW_FEATURE_ALWAYS =
#ifdef CONFIG_PPC_PSERIES
@@ -106,9 +101,6 @@ enum {
#ifdef CONFIG_PPC_PS3
FW_FEATURE_PS3_ALWAYS &
#endif
-#ifdef CONFIG_PPC_NATIVE
- FW_FEATURE_NATIVE_ALWAYS &
-#endif
FW_FEATURE_POSSIBLE,
#else /* CONFIG_PPC64 */
diff --git a/arch/powerpc/include/asm/floppy.h b/arch/powerpc/include/asm/floppy.h
index 7af9a68fd949..f8ce178b43b7 100644
--- a/arch/powerpc/include/asm/floppy.h
+++ b/arch/powerpc/include/asm/floppy.h
@@ -134,17 +134,19 @@ static int hard_dma_setup(char *addr, unsigned long size, int mode, int io)
int dir;
doing_vdma = 0;
- dir = (mode == DMA_MODE_READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE;
+ dir = (mode == DMA_MODE_READ) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
if (bus_addr
&& (addr != prev_addr || size != prev_size || dir != prev_dir)) {
/* different from last time -- unmap prev */
- pci_unmap_single(isa_bridge_pcidev, bus_addr, prev_size, prev_dir);
+ dma_unmap_single(&isa_bridge_pcidev->dev, bus_addr, prev_size,
+ prev_dir);
bus_addr = 0;
}
if (!bus_addr) /* need to map it */
- bus_addr = pci_map_single(isa_bridge_pcidev, addr, size, dir);
+ bus_addr = dma_map_single(&isa_bridge_pcidev->dev, addr, size,
+ dir);
/* remember this one as prev */
prev_addr = addr;
diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h
index 242204e12993..d73153b0275d 100644
--- a/arch/powerpc/include/asm/head-64.h
+++ b/arch/powerpc/include/asm/head-64.h
@@ -98,13 +98,9 @@ start_text:
. = sname##_len;
#define USE_FIXED_SECTION(sname) \
- fs_label = start_##sname; \
- fs_start = sname##_start; \
use_ftsec sname;
#define USE_TEXT_SECTION() \
- fs_label = start_text; \
- fs_start = text_start; \
.text
#define CLOSE_FIXED_SECTION(sname) \
@@ -161,13 +157,15 @@ name:
* - ABS_ADDR is used to find the absolute address of any symbol, from within
* a fixed section.
*/
-#define DEFINE_FIXED_SYMBOL(label) \
- label##_absolute = (label - fs_label + fs_start)
+// define label as being _in_ sname
+#define DEFINE_FIXED_SYMBOL(label, sname) \
+ label##_absolute = (label - start_ ## sname + sname ## _start)
#define FIXED_SYMBOL_ABS_ADDR(label) \
(label##_absolute)
-#define ABS_ADDR(label) (label - fs_label + fs_start)
+// find label from _within_ sname
+#define ABS_ADDR(label, sname) (label - start_ ## sname + sname ## _start)
#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index f18c543bc01d..962708fa1017 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -15,7 +15,7 @@
extern bool hugetlb_disabled;
-void hugetlbpage_init_default(void);
+void __init hugetlbpage_init_default(void);
int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
unsigned long len);
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index abebfbee5b1c..84d39fd42f71 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -10,7 +10,6 @@
#define _PPC_BOOK3S_64_HW_BREAKPOINT_H
#include <asm/cpu_has_feature.h>
-#include <asm/inst.h>
#ifdef __KERNEL__
struct arch_hw_breakpoint {
@@ -56,11 +55,11 @@ static inline int nr_wp_slots(void)
return cpu_has_feature(CPU_FTR_DAWR1) ? 2 : 1;
}
-bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr,
+bool wp_check_constraints(struct pt_regs *regs, ppc_inst_t instr,
unsigned long ea, int type, int size,
struct arch_hw_breakpoint *info);
-void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr,
+void wp_get_instr_detail(struct pt_regs *regs, ppc_inst_t *instr,
int *type, int *size, unsigned long *ea);
#ifdef CONFIG_HAVE_HW_BREAKPOINT
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 21cc571ea9c2..a58fb4aa6c81 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -61,7 +61,7 @@
static inline void __hard_irq_enable(void)
{
- if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x))
+ if (IS_ENABLED(CONFIG_BOOKE_OR_40x))
wrtee(MSR_EE);
else if (IS_ENABLED(CONFIG_PPC_8xx))
wrtspr(SPRN_EIE);
@@ -73,7 +73,7 @@ static inline void __hard_irq_enable(void)
static inline void __hard_irq_disable(void)
{
- if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x))
+ if (IS_ENABLED(CONFIG_BOOKE_OR_40x))
wrtee(0);
else if (IS_ENABLED(CONFIG_PPC_8xx))
wrtspr(SPRN_EID);
@@ -85,7 +85,7 @@ static inline void __hard_irq_disable(void)
static inline void __hard_EE_RI_disable(void)
{
- if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x))
+ if (IS_ENABLED(CONFIG_BOOKE_OR_40x))
wrtee(0);
else if (IS_ENABLED(CONFIG_PPC_8xx))
wrtspr(SPRN_NRI);
@@ -97,7 +97,7 @@ static inline void __hard_EE_RI_disable(void)
static inline void __hard_RI_enable(void)
{
- if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x))
+ if (IS_ENABLED(CONFIG_BOOKE_OR_40x))
return;
if (IS_ENABLED(CONFIG_PPC_8xx))
@@ -224,6 +224,42 @@ static inline bool arch_irqs_disabled(void)
return arch_irqs_disabled_flags(arch_local_save_flags());
}
+static inline void set_pmi_irq_pending(void)
+{
+ /*
+ * Invoked from PMU callback functions to set PMI bit in the paca.
+ * This has to be called with irq's disabled (via hard_irq_disable()).
+ */
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+ WARN_ON_ONCE(mfmsr() & MSR_EE);
+
+ get_paca()->irq_happened |= PACA_IRQ_PMI;
+}
+
+static inline void clear_pmi_irq_pending(void)
+{
+ /*
+ * Invoked from PMU callback functions to clear the pending PMI bit
+ * in the paca.
+ */
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+ WARN_ON_ONCE(mfmsr() & MSR_EE);
+
+ get_paca()->irq_happened &= ~PACA_IRQ_PMI;
+}
+
+static inline bool pmi_irq_pending(void)
+{
+ /*
+ * Invoked from PMU callback functions to check if there is a pending
+ * PMI bit in the paca.
+ */
+ if (get_paca()->irq_happened & PACA_IRQ_PMI)
+ return true;
+
+ return false;
+}
+
#ifdef CONFIG_PPC_BOOK3S
/*
* To support disabling and enabling of irq with PMI, set of
@@ -306,18 +342,57 @@ static inline bool lazy_irq_pending_nocheck(void)
return __lazy_irq_pending(local_paca->irq_happened);
}
+bool power_pmu_wants_prompt_pmi(void);
+
+/*
+ * This is called by asynchronous interrupts to check whether to
+ * conditionally re-enable hard interrupts after having cleared
+ * the source of the interrupt. They are kept disabled if there
+ * is a different soft-masked interrupt pending that requires hard
+ * masking.
+ */
+static inline bool should_hard_irq_enable(void)
+{
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+ WARN_ON(irq_soft_mask_return() == IRQS_ENABLED);
+ WARN_ON(mfmsr() & MSR_EE);
+#endif
+#ifdef CONFIG_PERF_EVENTS
+ /*
+ * If the PMU is not running, there is not much reason to enable
+ * MSR[EE] in irq handlers because any interrupts would just be
+ * soft-masked.
+ *
+ * TODO: Add test for 64e
+ */
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !power_pmu_wants_prompt_pmi())
+ return false;
+
+ if (get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK)
+ return false;
+
+ return true;
+#else
+ return false;
+#endif
+}
+
/*
- * This is called by asynchronous interrupts to conditionally
- * re-enable hard interrupts after having cleared the source
- * of the interrupt. They are kept disabled if there is a different
- * soft-masked interrupt pending that requires hard masking.
+ * Do the hard enabling, only call this if should_hard_irq_enable is true.
*/
-static inline void may_hard_irq_enable(void)
+static inline void do_hard_irq_enable(void)
{
- if (!(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK)) {
- get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS;
- __hard_irq_enable();
- }
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+ WARN_ON(irq_soft_mask_return() == IRQS_ENABLED);
+ WARN_ON(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK);
+ WARN_ON(mfmsr() & MSR_EE);
+#endif
+ /*
+ * This allows PMI interrupts (and watchdog soft-NMIs) through.
+ * There is no other reason to enable this way.
+ */
+ get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS;
+ __hard_irq_enable();
}
static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
@@ -398,7 +473,7 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
return !(regs->msr & MSR_EE);
}
-static inline bool may_hard_irq_enable(void)
+static inline bool should_hard_irq_enable(void)
{
return false;
}
@@ -408,6 +483,10 @@ static inline void do_hard_irq_enable(void)
BUILD_BUG();
}
+static inline void clear_pmi_irq_pending(void) { }
+static inline void set_pmi_irq_pending(void) { }
+static inline bool pmi_irq_pending(void) { return false; }
+
static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned long val)
{
}
diff --git a/arch/powerpc/include/asm/i8259.h b/arch/powerpc/include/asm/i8259.h
index d7f08ae49e12..75481d363cd8 100644
--- a/arch/powerpc/include/asm/i8259.h
+++ b/arch/powerpc/include/asm/i8259.h
@@ -7,7 +7,7 @@
extern void i8259_init(struct device_node *node, unsigned long intack_addr);
extern unsigned int i8259_irq(void);
-extern struct irq_domain *i8259_get_host(void);
+struct irq_domain *__init i8259_get_host(void);
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_I8259_H */
diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
index b11c0e2f9639..80b6d74146c6 100644
--- a/arch/powerpc/include/asm/inst.h
+++ b/arch/powerpc/include/asm/inst.h
@@ -3,20 +3,21 @@
#define _ASM_POWERPC_INST_H
#include <asm/ppc-opcode.h>
-
-#ifdef CONFIG_PPC64
+#include <asm/reg.h>
+#include <asm/disassemble.h>
+#include <asm/uaccess.h>
#define ___get_user_instr(gu_op, dest, ptr) \
({ \
long __gui_ret; \
u32 __user *__gui_ptr = (u32 __user *)ptr; \
- struct ppc_inst __gui_inst; \
+ ppc_inst_t __gui_inst; \
unsigned int __prefix, __suffix; \
\
__chk_user_ptr(ptr); \
__gui_ret = gu_op(__prefix, __gui_ptr); \
if (__gui_ret == 0) { \
- if ((__prefix >> 26) == OP_PREFIX) { \
+ if (IS_ENABLED(CONFIG_PPC64) && (__prefix >> 26) == OP_PREFIX) { \
__gui_ret = gu_op(__suffix, __gui_ptr + 1); \
__gui_inst = ppc_inst_prefix(__prefix, __suffix); \
} else { \
@@ -27,13 +28,6 @@
} \
__gui_ret; \
})
-#else /* !CONFIG_PPC64 */
-#define ___get_user_instr(gu_op, dest, ptr) \
-({ \
- __chk_user_ptr(ptr); \
- gu_op((dest).val, (u32 __user *)(ptr)); \
-})
-#endif /* CONFIG_PPC64 */
#define get_user_instr(x, ptr) ___get_user_instr(get_user, x, ptr)
@@ -43,44 +37,46 @@
* Instruction data type for POWER
*/
-struct ppc_inst {
- u32 val;
-#ifdef CONFIG_PPC64
- u32 suffix;
-#endif
-} __packed;
-
-static inline u32 ppc_inst_val(struct ppc_inst x)
+#if defined(CONFIG_PPC64) || defined(__CHECKER__)
+static inline u32 ppc_inst_val(ppc_inst_t x)
{
return x.val;
}
-static inline int ppc_inst_primary_opcode(struct ppc_inst x)
+#define ppc_inst(x) ((ppc_inst_t){ .val = (x) })
+
+#else
+static inline u32 ppc_inst_val(ppc_inst_t x)
{
- return ppc_inst_val(x) >> 26;
+ return x;
}
+#define ppc_inst(x) (x)
+#endif
-#define ppc_inst(x) ((struct ppc_inst){ .val = (x) })
+static inline int ppc_inst_primary_opcode(ppc_inst_t x)
+{
+ return ppc_inst_val(x) >> 26;
+}
#ifdef CONFIG_PPC64
-#define ppc_inst_prefix(x, y) ((struct ppc_inst){ .val = (x), .suffix = (y) })
+#define ppc_inst_prefix(x, y) ((ppc_inst_t){ .val = (x), .suffix = (y) })
-static inline u32 ppc_inst_suffix(struct ppc_inst x)
+static inline u32 ppc_inst_suffix(ppc_inst_t x)
{
return x.suffix;
}
#else
-#define ppc_inst_prefix(x, y) ppc_inst(x)
+#define ppc_inst_prefix(x, y) ((void)y, ppc_inst(x))
-static inline u32 ppc_inst_suffix(struct ppc_inst x)
+static inline u32 ppc_inst_suffix(ppc_inst_t x)
{
return 0;
}
#endif /* CONFIG_PPC64 */
-static inline struct ppc_inst ppc_inst_read(const u32 *ptr)
+static inline ppc_inst_t ppc_inst_read(const u32 *ptr)
{
if (IS_ENABLED(CONFIG_PPC64) && (*ptr >> 26) == OP_PREFIX)
return ppc_inst_prefix(*ptr, *(ptr + 1));
@@ -88,17 +84,17 @@ static inline struct ppc_inst ppc_inst_read(const u32 *ptr)
return ppc_inst(*ptr);
}
-static inline bool ppc_inst_prefixed(struct ppc_inst x)
+static inline bool ppc_inst_prefixed(ppc_inst_t x)
{
return IS_ENABLED(CONFIG_PPC64) && ppc_inst_primary_opcode(x) == OP_PREFIX;
}
-static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x)
+static inline ppc_inst_t ppc_inst_swab(ppc_inst_t x)
{
return ppc_inst_prefix(swab32(ppc_inst_val(x)), swab32(ppc_inst_suffix(x)));
}
-static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y)
+static inline bool ppc_inst_equal(ppc_inst_t x, ppc_inst_t y)
{
if (ppc_inst_val(x) != ppc_inst_val(y))
return false;
@@ -107,7 +103,7 @@ static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y)
return ppc_inst_suffix(x) == ppc_inst_suffix(y);
}
-static inline int ppc_inst_len(struct ppc_inst x)
+static inline int ppc_inst_len(ppc_inst_t x)
{
return ppc_inst_prefixed(x) ? 8 : 4;
}
@@ -118,14 +114,14 @@ static inline int ppc_inst_len(struct ppc_inst x)
*/
static inline u32 *ppc_inst_next(u32 *location, u32 *value)
{
- struct ppc_inst tmp;
+ ppc_inst_t tmp;
tmp = ppc_inst_read(value);
return (void *)location + ppc_inst_len(tmp);
}
-static inline unsigned long ppc_inst_as_ulong(struct ppc_inst x)
+static inline unsigned long ppc_inst_as_ulong(ppc_inst_t x)
{
if (IS_ENABLED(CONFIG_PPC32))
return ppc_inst_val(x);
@@ -135,9 +131,17 @@ static inline unsigned long ppc_inst_as_ulong(struct ppc_inst x)
return (u64)ppc_inst_val(x) << 32 | ppc_inst_suffix(x);
}
+static inline void ppc_inst_write(u32 *ptr, ppc_inst_t x)
+{
+ if (!ppc_inst_prefixed(x))
+ *ptr = ppc_inst_val(x);
+ else
+ *(u64 *)ptr = ppc_inst_as_ulong(x);
+}
+
#define PPC_INST_STR_LEN sizeof("00000000 00000000")
-static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], struct ppc_inst x)
+static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], ppc_inst_t x)
{
if (ppc_inst_prefixed(x))
sprintf(str, "%08x %08x", ppc_inst_val(x), ppc_inst_suffix(x));
@@ -154,6 +158,27 @@ static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], struct ppc_ins
__str; \
})
-int copy_inst_from_kernel_nofault(struct ppc_inst *inst, u32 *src);
+static inline int copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src)
+{
+ unsigned int val, suffix;
+
+ if (unlikely(!is_kernel_addr((unsigned long)src)))
+ return -ERANGE;
+
+/* See https://github.com/ClangBuiltLinux/linux/issues/1521 */
+#if defined(CONFIG_CC_IS_CLANG) && CONFIG_CLANG_VERSION < 140000
+ val = suffix = 0;
+#endif
+ __get_kernel_nofault(&val, src, u32, Efault);
+ if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) {
+ __get_kernel_nofault(&suffix, src + 1, u32, Efault);
+ *inst = ppc_inst_prefix(val, suffix);
+ } else {
+ *inst = ppc_inst(val);
+ }
+ return 0;
+Efault:
+ return -EFAULT;
+}
#endif /* _ASM_POWERPC_INST_H */
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index a1d238255f07..fc28f46d2f9d 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -97,6 +97,11 @@ static inline void srr_regs_clobbered(void)
local_paca->hsrr_valid = 0;
}
#else
+static inline unsigned long search_kernel_restart_table(unsigned long addr)
+{
+ return 0;
+}
+
static inline bool is_implicit_soft_masked(struct pt_regs *regs)
{
return false;
@@ -139,39 +144,68 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
if (!arch_irq_disabled_regs(regs))
trace_hardirqs_off();
- if (user_mode(regs)) {
- kuep_lock();
- account_cpu_user_entry();
- } else {
+ if (user_mode(regs))
+ kuap_lock();
+ else
kuap_save_and_lock(regs);
- }
+
+ if (user_mode(regs))
+ account_cpu_user_entry();
#endif
#ifdef CONFIG_PPC64
- if (irq_soft_mask_set_return(IRQS_ALL_DISABLED) == IRQS_ENABLED)
+ bool trace_enable = false;
+
+ if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS)) {
+ if (irq_soft_mask_set_return(IRQS_ALL_DISABLED) == IRQS_ENABLED)
+ trace_enable = true;
+ } else {
+ irq_soft_mask_set(IRQS_ALL_DISABLED);
+ }
+
+ /*
+ * If the interrupt was taken with HARD_DIS clear, then enable MSR[EE].
+ * Asynchronous interrupts get here with HARD_DIS set (see below), so
+ * this enables MSR[EE] for synchronous interrupts. IRQs remain
+ * soft-masked. The interrupt handler may later call
+ * interrupt_cond_local_irq_enable() to achieve a regular process
+ * context.
+ */
+ if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) {
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+ BUG_ON(!(regs->msr & MSR_EE));
+ __hard_irq_enable();
+ } else {
+ __hard_RI_enable();
+ }
+
+ /* Do this when RI=1 because it can cause SLB faults */
+ if (trace_enable)
trace_hardirqs_off();
- local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
if (user_mode(regs)) {
+ kuap_lock();
CT_WARN_ON(ct_state() != CONTEXT_USER);
user_exit_irqoff();
account_cpu_user_entry();
account_stolen_time();
} else {
+ kuap_save_and_lock(regs);
/*
* CT_WARN_ON comes here via program_check_exception,
* so avoid recursion.
*/
if (TRAP(regs) != INTERRUPT_PROGRAM) {
CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
- BUG_ON(is_implicit_soft_masked(regs));
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+ BUG_ON(is_implicit_soft_masked(regs));
}
-#ifdef CONFIG_PPC_BOOK3S
+
/* Move this under a debugging check */
- if (arch_irq_disabled_regs(regs))
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) &&
+ arch_irq_disabled_regs(regs))
BUG_ON(search_kernel_restart_table(regs->nip));
-#endif
}
if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
BUG_ON(!arch_irq_disabled_regs(regs) && !(regs->msr & MSR_EE));
@@ -200,13 +234,20 @@ static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt
static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
{
+#ifdef CONFIG_PPC64
+ /* Ensure interrupt_enter_prepare does not enable MSR[EE] */
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+#endif
+ interrupt_enter_prepare(regs, state);
#ifdef CONFIG_PPC_BOOK3S_64
+ /*
+ * RI=1 is set by interrupt_enter_prepare, so this thread flags access
+ * has to come afterward (it can cause SLB faults).
+ */
if (cpu_has_feature(CPU_FTR_CTRL) &&
!test_thread_local_flags(_TLF_RUNLATCH))
__ppc64_runlatch_on();
#endif
-
- interrupt_enter_prepare(regs, state);
irq_enter();
}
@@ -276,6 +317,8 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte
regs->softe = IRQS_ALL_DISABLED;
}
+ __hard_RI_enable();
+
/* Don't do any per-CPU operations until interrupt state is fixed */
if (nmi_disables_ftrace(regs)) {
@@ -373,6 +416,8 @@ interrupt_handler long func(struct pt_regs *regs) \
{ \
long ret; \
\
+ __hard_RI_enable(); \
+ \
ret = ____##func (regs); \
\
return ret; \
@@ -564,7 +609,7 @@ DECLARE_INTERRUPT_HANDLER(kernel_bad_stack);
/* slb.c */
DECLARE_INTERRUPT_HANDLER_RAW(do_slb_fault);
-DECLARE_INTERRUPT_HANDLER(do_bad_slb_fault);
+DECLARE_INTERRUPT_HANDLER(do_bad_segment_interrupt);
/* hash_utils.c */
DECLARE_INTERRUPT_HANDLER_RAW(do_hash_fault);
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index c361212ac160..d7912b66c874 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -275,7 +275,7 @@ extern void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle,
size_t size, enum dma_data_direction direction,
unsigned long attrs);
-extern void iommu_init_early_pSeries(void);
+void __init iommu_init_early_pSeries(void);
extern void iommu_init_early_dart(struct pci_controller_ops *controller_ops);
extern void iommu_init_early_pasemi(void);
diff --git a/arch/powerpc/include/asm/ipic.h b/arch/powerpc/include/asm/ipic.h
index 0524df31a7e6..b47ca7dc7199 100644
--- a/arch/powerpc/include/asm/ipic.h
+++ b/arch/powerpc/include/asm/ipic.h
@@ -65,7 +65,7 @@ enum ipic_mcp_irq {
IPIC_MCP_MU = 7,
};
-extern void ipic_set_default_priority(void);
+void __init ipic_set_default_priority(void);
extern u32 ipic_get_mcp_status(void);
extern void ipic_clear_mcp_status(u32 mask);
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index 2b3278534bc1..13f0409dd617 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -36,7 +36,7 @@ extern int distribute_irqs;
struct pt_regs;
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#ifdef CONFIG_BOOKE_OR_40x
/*
* Per-cpu stacks for handling critical, debug and machine check
* level interrupts.
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index c6f250eca3fb..8ebdd23d987c 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -84,7 +84,7 @@ extern int crash_shutdown_register(crash_shutdown_t handler);
extern int crash_shutdown_unregister(crash_shutdown_t handler);
extern void crash_kexec_secondary(struct pt_regs *regs);
-extern int overlaps_crashkernel(unsigned long start, unsigned long size);
+int __init overlaps_crashkernel(unsigned long start, unsigned long size);
extern void reserve_crashkernel(void);
extern void machine_kexec_mask_interrupts(void);
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index 1df763002726..fb2237809d63 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -14,6 +14,10 @@
#include <asm/nohash/32/kup-8xx.h>
#endif
+#ifdef CONFIG_BOOKE_OR_40x
+#include <asm/nohash/kup-booke.h>
+#endif
+
#ifdef CONFIG_PPC_BOOK3S_32
#include <asm/book3s/32/kup.h>
#endif
@@ -32,34 +36,29 @@ extern bool disable_kuap;
#include <linux/pgtable.h>
-#ifdef CONFIG_PPC_KUEP
+void setup_kup(void);
void setup_kuep(bool disabled);
-#else
-static inline void setup_kuep(bool disabled) { }
-#endif /* CONFIG_PPC_KUEP */
-
-#ifndef CONFIG_PPC_BOOK3S_32
-static inline void kuep_lock(void) { }
-static inline void kuep_unlock(void) { }
-#endif
#ifdef CONFIG_PPC_KUAP
void setup_kuap(bool disabled);
#else
static inline void setup_kuap(bool disabled) { }
+static __always_inline bool kuap_is_disabled(void) { return true; }
+
static inline bool
-bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+__bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
return false;
}
-static inline void kuap_assert_locked(void) { }
-static inline void kuap_save_and_lock(struct pt_regs *regs) { }
+static inline void __kuap_assert_locked(void) { }
+static inline void __kuap_lock(void) { }
+static inline void __kuap_save_and_lock(struct pt_regs *regs) { }
static inline void kuap_user_restore(struct pt_regs *regs) { }
-static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) { }
+static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) { }
-static inline unsigned long kuap_get_and_assert_locked(void)
+static inline unsigned long __kuap_get_and_assert_locked(void)
{
return 0;
}
@@ -70,20 +69,99 @@ static inline unsigned long kuap_get_and_assert_locked(void)
* platforms.
*/
#ifndef CONFIG_PPC_BOOK3S_64
-static inline void allow_user_access(void __user *to, const void __user *from,
- unsigned long size, unsigned long dir) { }
-static inline void prevent_user_access(unsigned long dir) { }
-static inline unsigned long prevent_user_access_return(void) { return 0UL; }
-static inline void restore_user_access(unsigned long flags) { }
+static inline void __allow_user_access(void __user *to, const void __user *from,
+ unsigned long size, unsigned long dir) { }
+static inline void __prevent_user_access(unsigned long dir) { }
+static inline unsigned long __prevent_user_access_return(void) { return 0UL; }
+static inline void __restore_user_access(unsigned long flags) { }
#endif /* CONFIG_PPC_BOOK3S_64 */
#endif /* CONFIG_PPC_KUAP */
-static __always_inline void setup_kup(void)
+static __always_inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+{
+ if (kuap_is_disabled())
+ return false;
+
+ return __bad_kuap_fault(regs, address, is_write);
+}
+
+static __always_inline void kuap_assert_locked(void)
{
- setup_kuep(disable_kuep);
- setup_kuap(disable_kuap);
+ if (kuap_is_disabled())
+ return;
+
+ if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
+ __kuap_get_and_assert_locked();
+}
+
+static __always_inline void kuap_lock(void)
+{
+ if (kuap_is_disabled())
+ return;
+
+ __kuap_lock();
+}
+
+static __always_inline void kuap_save_and_lock(struct pt_regs *regs)
+{
+ if (kuap_is_disabled())
+ return;
+
+ __kuap_save_and_lock(regs);
}
+static __always_inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr)
+{
+ if (kuap_is_disabled())
+ return;
+
+ __kuap_kernel_restore(regs, amr);
+}
+
+static __always_inline unsigned long kuap_get_and_assert_locked(void)
+{
+ if (kuap_is_disabled())
+ return 0;
+
+ return __kuap_get_and_assert_locked();
+}
+
+#ifndef CONFIG_PPC_BOOK3S_64
+static __always_inline void allow_user_access(void __user *to, const void __user *from,
+ unsigned long size, unsigned long dir)
+{
+ if (kuap_is_disabled())
+ return;
+
+ __allow_user_access(to, from, size, dir);
+}
+
+static __always_inline void prevent_user_access(unsigned long dir)
+{
+ if (kuap_is_disabled())
+ return;
+
+ __prevent_user_access(dir);
+}
+
+static __always_inline unsigned long prevent_user_access_return(void)
+{
+ if (kuap_is_disabled())
+ return 0;
+
+ return __prevent_user_access_return();
+}
+
+static __always_inline void restore_user_access(unsigned long flags)
+{
+ if (kuap_is_disabled())
+ return;
+
+ __restore_user_access(flags);
+}
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
static __always_inline void allow_read_from_user(const void __user *from, unsigned long size)
{
barrier_nospec();
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index fbbf3cec92e9..d68d71987d5c 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -79,6 +79,7 @@
#define BOOK3S_INTERRUPT_FP_UNAVAIL 0x800
#define BOOK3S_INTERRUPT_DECREMENTER 0x900
#define BOOK3S_INTERRUPT_HV_DECREMENTER 0x980
+#define BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER 0x1980
#define BOOK3S_INTERRUPT_DOORBELL 0xa00
#define BOOK3S_INTERRUPT_SYSCALL 0xc00
#define BOOK3S_INTERRUPT_TRACE 0xd00
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 3d31f2c59e43..91c9f937edcd 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -406,6 +406,12 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
return vcpu->arch.fault_dar;
}
+/* Expiry time of vcpu DEC relative to host TB */
+static inline u64 kvmppc_dec_expires_host_tb(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.dec_expires - vcpu->arch.vcore->tb_offset;
+}
+
static inline bool is_kvmppc_resume_guest(int r)
{
return (r == RESUME_GUEST || r == RESUME_GUEST_NV);
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index fff391b9b97b..fe07558173ef 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -44,7 +44,6 @@ struct kvm_nested_guest {
struct mutex tlb_lock; /* serialize page faults and tlbies */
struct kvm_nested_guest *next;
cpumask_t need_tlb_flush;
- cpumask_t cpu_in_guest;
short prev_cpu[NR_CPUS];
u8 radix; /* is this nested guest radix */
};
@@ -154,7 +153,9 @@ static inline bool kvmhv_vcpu_is_radix(struct kvm_vcpu *vcpu)
return radix;
}
-int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr);
+unsigned long kvmppc_msr_hard_disable_set_facilities(struct kvm_vcpu *vcpu, unsigned long msr);
+
+int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb);
#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
#endif
diff --git a/arch/powerpc/include/asm/kvm_guest.h b/arch/powerpc/include/asm/kvm_guest.h
index c63105d2c9e7..68e499abdb24 100644
--- a/arch/powerpc/include/asm/kvm_guest.h
+++ b/arch/powerpc/include/asm/kvm_guest.h
@@ -16,7 +16,7 @@ static inline bool is_kvm_guest(void)
return static_branch_unlikely(&kvm_guest);
}
-int check_kvm_guest(void);
+int __init check_kvm_guest(void);
#else
static inline bool is_kvm_guest(void) { return false; }
static inline int check_kvm_guest(void) { return 0; }
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index e4d23193eba7..17263276189e 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -287,7 +287,6 @@ struct kvm_arch {
u32 online_vcores;
atomic_t hpte_mod_interest;
cpumask_t need_tlb_flush;
- cpumask_t cpu_in_guest;
u8 radix;
u8 fwnmi_enabled;
u8 secure_guest;
@@ -579,6 +578,10 @@ struct kvm_vcpu_arch {
ulong cfar;
ulong ppr;
u32 pspb;
+ u8 load_ebb;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ u8 load_tm;
+#endif
ulong fscr;
ulong shadow_fscr;
ulong ebbhr;
@@ -741,7 +744,7 @@ struct kvm_vcpu_arch {
struct hrtimer dec_timer;
u64 dec_jiffies;
- u64 dec_expires;
+ u64 dec_expires; /* Relative to guest timebase. */
unsigned long pending_exceptions;
u8 ceded;
u8 prodded;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 671fbd1a765e..33db83b82fbd 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -552,8 +552,7 @@ extern void kvm_hv_vm_activated(void);
extern void kvm_hv_vm_deactivated(void);
extern bool kvm_hv_mode_active(void);
-extern void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu,
- struct kvm_nested_guest *nested);
+extern void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu);
#else
static inline void __init kvm_cma_reserve(void)
@@ -760,6 +759,7 @@ void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
void kvmppc_subcore_enter_guest(void);
void kvmppc_subcore_exit_guest(void);
long kvmppc_realmode_hmi_handler(void);
+long kvmppc_p9_realmode_hmi_handler(struct kvm_vcpu *vcpu);
long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
long pte_index, unsigned long pteh, unsigned long ptel);
long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 9c3c9f04129f..e821037f74f0 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -235,8 +235,6 @@ extern struct machdep_calls *machine_id;
machine_id == &mach_##name; \
})
-extern void probe_machine(void);
-
#ifdef CONFIG_PPC_PMAC
/*
* Power macintoshes have either a CUDA, PMU or SMU controlling
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 8abe8e42e045..5f41565a1e5d 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -157,7 +157,7 @@ DECLARE_PER_CPU(int, next_tlbcam_idx);
enum {
MMU_FTRS_POSSIBLE =
-#if defined(CONFIG_PPC_BOOK3S_64) || defined(CONFIG_PPC_BOOK3S_604)
+#if defined(CONFIG_PPC_BOOK3S_604)
MMU_FTR_HPTE_TABLE |
#endif
#ifdef CONFIG_PPC_8xx
@@ -184,15 +184,18 @@ enum {
MMU_FTR_USE_TLBRSRV | MMU_FTR_USE_PAIRED_MAS |
#endif
#ifdef CONFIG_PPC_BOOK3S_64
+ MMU_FTR_KERNEL_RO |
+#ifdef CONFIG_PPC_64S_HASH_MMU
MMU_FTR_NO_SLBIE_B | MMU_FTR_16M_PAGE | MMU_FTR_TLBIEL |
MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_CI_LARGE_PAGE |
MMU_FTR_1T_SEGMENT | MMU_FTR_TLBIE_CROP_VA |
- MMU_FTR_KERNEL_RO | MMU_FTR_68_BIT_VA |
+ MMU_FTR_68_BIT_VA | MMU_FTR_HPTE_TABLE |
#endif
#ifdef CONFIG_PPC_RADIX_MMU
MMU_FTR_TYPE_RADIX |
MMU_FTR_GTSE |
#endif /* CONFIG_PPC_RADIX_MMU */
+#endif
#ifdef CONFIG_PPC_KUAP
MMU_FTR_BOOK3S_KUAP |
#endif /* CONFIG_PPC_KUAP */
@@ -224,6 +227,13 @@ enum {
#define MMU_FTRS_ALWAYS MMU_FTR_TYPE_FSL_E
#endif
+/* BOOK3S_64 options */
+#if defined(CONFIG_PPC_RADIX_MMU) && !defined(CONFIG_PPC_64S_HASH_MMU)
+#define MMU_FTRS_ALWAYS MMU_FTR_TYPE_RADIX
+#elif !defined(CONFIG_PPC_RADIX_MMU) && defined(CONFIG_PPC_64S_HASH_MMU)
+#define MMU_FTRS_ALWAYS MMU_FTR_HPTE_TABLE
+#endif
+
#ifndef MMU_FTRS_ALWAYS
#define MMU_FTRS_ALWAYS 0
#endif
@@ -329,7 +339,7 @@ static __always_inline bool radix_enabled(void)
return mmu_has_feature(MMU_FTR_TYPE_RADIX);
}
-static inline bool early_radix_enabled(void)
+static __always_inline bool early_radix_enabled(void)
{
return early_mmu_has_feature(MMU_FTR_TYPE_RADIX);
}
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 9ba6b585337f..fd277b15635c 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -71,10 +71,11 @@ static inline void switch_mmu_context(struct mm_struct *prev,
}
extern int hash__alloc_context_id(void);
-extern void hash__reserve_context_id(int id);
+void __init hash__reserve_context_id(int id);
extern void __destroy_context(int context_id);
static inline void mmu_context_init(void) { }
+#ifdef CONFIG_PPC_64S_HASH_MMU
static inline int alloc_extended_context(struct mm_struct *mm,
unsigned long ea)
{
@@ -100,6 +101,7 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
return true;
return false;
}
+#endif
#else
extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h
index 0abf2e7fd222..58353c5bd3fb 100644
--- a/arch/powerpc/include/asm/mpic.h
+++ b/arch/powerpc/include/asm/mpic.h
@@ -472,7 +472,7 @@ extern int mpic_cpu_get_priority(void);
extern void mpic_cpu_set_priority(int prio);
/* Request IPIs on primary mpic */
-extern void mpic_request_ipis(void);
+void __init mpic_request_ipis(void);
/* Send a message (IPI) to a given target (cpu number or MSG_*) */
void smp_mpic_message_pass(int target, int msg);
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
index 882a0bc7887a..c44d97751723 100644
--- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -20,11 +20,12 @@ static __always_inline bool kuap_is_disabled(void)
return static_branch_unlikely(&disable_kuap_key);
}
-static inline void kuap_save_and_lock(struct pt_regs *regs)
+static inline void __kuap_lock(void)
{
- if (kuap_is_disabled())
- return;
+}
+static inline void __kuap_save_and_lock(struct pt_regs *regs)
+{
regs->kuap = mfspr(SPRN_MD_AP);
mtspr(SPRN_MD_AP, MD_APG_KUAP);
}
@@ -33,21 +34,15 @@ static inline void kuap_user_restore(struct pt_regs *regs)
{
}
-static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
+static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
{
- if (kuap_is_disabled())
- return;
-
mtspr(SPRN_MD_AP, regs->kuap);
}
-static inline unsigned long kuap_get_and_assert_locked(void)
+static inline unsigned long __kuap_get_and_assert_locked(void)
{
unsigned long kuap;
- if (kuap_is_disabled())
- return MD_APG_INIT;
-
kuap = mfspr(SPRN_MD_AP);
if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
@@ -56,36 +51,21 @@ static inline unsigned long kuap_get_and_assert_locked(void)
return kuap;
}
-static inline void kuap_assert_locked(void)
-{
- if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && !kuap_is_disabled())
- kuap_get_and_assert_locked();
-}
-
-static inline void allow_user_access(void __user *to, const void __user *from,
- unsigned long size, unsigned long dir)
+static inline void __allow_user_access(void __user *to, const void __user *from,
+ unsigned long size, unsigned long dir)
{
- if (kuap_is_disabled())
- return;
-
mtspr(SPRN_MD_AP, MD_APG_INIT);
}
-static inline void prevent_user_access(unsigned long dir)
+static inline void __prevent_user_access(unsigned long dir)
{
- if (kuap_is_disabled())
- return;
-
mtspr(SPRN_MD_AP, MD_APG_KUAP);
}
-static inline unsigned long prevent_user_access_return(void)
+static inline unsigned long __prevent_user_access_return(void)
{
unsigned long flags;
- if (kuap_is_disabled())
- return MD_APG_INIT;
-
flags = mfspr(SPRN_MD_AP);
mtspr(SPRN_MD_AP, MD_APG_KUAP);
@@ -93,20 +73,14 @@ static inline unsigned long prevent_user_access_return(void)
return flags;
}
-static inline void restore_user_access(unsigned long flags)
+static inline void __restore_user_access(unsigned long flags)
{
- if (kuap_is_disabled())
- return;
-
mtspr(SPRN_MD_AP, flags);
}
static inline bool
-bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+__bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
- if (kuap_is_disabled())
- return false;
-
return !((regs->kuap ^ MD_APG_KUAP) & 0xff000000);
}
diff --git a/arch/powerpc/include/asm/nohash/32/mmu-44x.h b/arch/powerpc/include/asm/nohash/32/mmu-44x.h
index 43ceca128531..2d92a39d8f2e 100644
--- a/arch/powerpc/include/asm/nohash/32/mmu-44x.h
+++ b/arch/powerpc/include/asm/nohash/32/mmu-44x.h
@@ -113,7 +113,6 @@ typedef struct {
/* patch sites */
extern s32 patch__tlb_44x_hwater_D, patch__tlb_44x_hwater_I;
-extern s32 patch__tlb_44x_kuep, patch__tlb_47x_kuep;
#endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
index 997cec973406..0e93a4728c9e 100644
--- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
@@ -39,12 +39,10 @@
* 0 => Kernel => 11 (all accesses performed according as user iaw page definition)
* 1 => Kernel+Accessed => 01 (all accesses performed according to page definition)
* 2 => User => 11 (all accesses performed according as user iaw page definition)
- * 3 => User+Accessed => 00 (all accesses performed as supervisor iaw page definition) for INIT
- * => 10 (all accesses performed according to swaped page definition) for KUEP
+ * 3 => User+Accessed => 10 (all accesses performed according to swaped page definition) for KUEP
* 4-15 => Not Used
*/
-#define MI_APG_INIT 0xdc000000
-#define MI_APG_KUEP 0xde000000
+#define MI_APG_INIT 0xde000000
/* The effective page number register. When read, contains the information
* about the last instruction TLB miss. When MI_RPN is written, bits in
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 9d2905a47410..a3313e853e5e 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -313,6 +313,12 @@ extern int __meminit vmemmap_create_mapping(unsigned long start,
unsigned long phys);
extern void vmemmap_remove_mapping(unsigned long start,
unsigned long page_size);
+void __patch_exception(int exc, unsigned long addr);
+#define patch_exception(exc, name) do { \
+ extern unsigned int name; \
+ __patch_exception((exc), (unsigned long)&name); \
+} while (0)
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_NOHASH_64_PGTABLE_H */
diff --git a/arch/powerpc/include/asm/nohash/kup-booke.h b/arch/powerpc/include/asm/nohash/kup-booke.h
new file mode 100644
index 000000000000..49bb41ed0816
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/kup-booke.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_KUP_BOOKE_H_
+#define _ASM_POWERPC_KUP_BOOKE_H_
+
+#include <asm/bug.h>
+
+#ifdef CONFIG_PPC_KUAP
+
+#ifdef __ASSEMBLY__
+
+.macro kuap_check_amr gpr1, gpr2
+.endm
+
+#else
+
+#include <linux/jump_label.h>
+#include <linux/sched.h>
+
+#include <asm/reg.h>
+
+extern struct static_key_false disable_kuap_key;
+
+static __always_inline bool kuap_is_disabled(void)
+{
+ return static_branch_unlikely(&disable_kuap_key);
+}
+
+static inline void __kuap_lock(void)
+{
+ mtspr(SPRN_PID, 0);
+ isync();
+}
+
+static inline void __kuap_save_and_lock(struct pt_regs *regs)
+{
+ regs->kuap = mfspr(SPRN_PID);
+ mtspr(SPRN_PID, 0);
+ isync();
+}
+
+static inline void kuap_user_restore(struct pt_regs *regs)
+{
+ if (kuap_is_disabled())
+ return;
+
+ mtspr(SPRN_PID, current->thread.pid);
+
+ /* Context synchronisation is performed by rfi */
+}
+
+static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
+{
+ if (regs->kuap)
+ mtspr(SPRN_PID, current->thread.pid);
+
+ /* Context synchronisation is performed by rfi */
+}
+
+static inline unsigned long __kuap_get_and_assert_locked(void)
+{
+ unsigned long kuap = mfspr(SPRN_PID);
+
+ if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
+ WARN_ON_ONCE(kuap);
+
+ return kuap;
+}
+
+static inline void __allow_user_access(void __user *to, const void __user *from,
+ unsigned long size, unsigned long dir)
+{
+ mtspr(SPRN_PID, current->thread.pid);
+ isync();
+}
+
+static inline void __prevent_user_access(unsigned long dir)
+{
+ mtspr(SPRN_PID, 0);
+ isync();
+}
+
+static inline unsigned long __prevent_user_access_return(void)
+{
+ unsigned long flags = mfspr(SPRN_PID);
+
+ mtspr(SPRN_PID, 0);
+ isync();
+
+ return flags;
+}
+
+static inline void __restore_user_access(unsigned long flags)
+{
+ if (flags) {
+ mtspr(SPRN_PID, current->thread.pid);
+ isync();
+ }
+}
+
+static inline bool
+__bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+{
+ return !regs->kuap;
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* CONFIG_PPC_KUAP */
+
+#endif /* _ASM_POWERPC_KUP_BOOKE_H_ */
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 0b63ba7d5917..a2bc4b95e703 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -1094,6 +1094,7 @@ enum {
OPAL_XIVE_IRQ_SHIFT_BUG = 0x00000008, /* P9 DD1.0 workaround */
OPAL_XIVE_IRQ_MASK_VIA_FW = 0x00000010, /* P9 DD1.0 workaround */
OPAL_XIVE_IRQ_EOI_VIA_FW = 0x00000020, /* P9 DD1.0 workaround */
+ OPAL_XIVE_IRQ_STORE_EOI2 = 0x00000040,
};
/* Flags for OPAL_XIVE_GET/SET_QUEUE_INFO */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 6ea9001de9a9..bfd3142cd0ba 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -314,7 +314,7 @@ extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
int depth, void *data);
extern int early_init_dt_scan_recoverable_ranges(unsigned long node,
const char *uname, int depth, void *data);
-extern void opal_configure_cores(void);
+void __init opal_configure_cores(void);
extern int opal_get_chars(uint32_t vtermno, char *buf, int count);
extern int opal_put_chars(uint32_t vtermno, const char *buf, int total_len);
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index dc05a862e72a..295573a82c66 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -97,7 +97,9 @@ struct paca_struct {
/* this becomes non-zero. */
u8 kexec_state; /* set when kexec down has irqs off */
#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
struct slb_shadow *slb_shadow_ptr;
+#endif
struct dtl_entry *dispatch_log;
struct dtl_entry *dispatch_log_end;
#endif
@@ -110,6 +112,7 @@ struct paca_struct {
/* used for most interrupts/exceptions */
u64 exgen[EX_SIZE] __attribute__((aligned(0x80)));
+#ifdef CONFIG_PPC_64S_HASH_MMU
/* SLB related definitions */
u16 vmalloc_sllp;
u8 slb_cache_ptr;
@@ -120,6 +123,7 @@ struct paca_struct {
u32 slb_used_bitmap; /* Bitmaps for first 32 SLB entries. */
u32 slb_kern_bitmap;
u32 slb_cache[SLB_CACHE_ENTRIES];
+#endif
#endif /* CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_PPC_BOOK3E
@@ -149,6 +153,7 @@ struct paca_struct {
#endif /* CONFIG_PPC_BOOK3E */
#ifdef CONFIG_PPC_BOOK3S
+#ifdef CONFIG_PPC_64S_HASH_MMU
#ifdef CONFIG_PPC_MM_SLICES
unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
@@ -157,6 +162,7 @@ struct paca_struct {
u16 mm_ctx_sllp;
#endif
#endif
+#endif
/*
* then miscellaneous read-write fields
@@ -268,9 +274,11 @@ struct paca_struct {
#endif /* CONFIG_PPC_PSERIES */
#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
/* Capture SLB related old contents in MCE handler. */
struct slb_entry *mce_faulty_slbs;
u16 slb_save_cache_ptr;
+#endif
#endif /* CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_STACKPROTECTOR
unsigned long canary;
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index d1f53260725c..915d6ee4b40a 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -48,7 +48,7 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
}
#ifdef CONFIG_PCI
-extern void set_pci_dma_ops(const struct dma_map_ops *dma_ops);
+void __init set_pci_dma_ops(const struct dma_map_ops *dma_ops);
#else /* CONFIG_PCI */
#define set_pci_dma_ops(d)
#endif
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index f4c3428e816b..e2221d29fdf9 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -98,7 +98,7 @@ struct power_pmu {
#define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */
#define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */
-extern int register_power_pmu(struct power_pmu *);
+int __init register_power_pmu(struct power_pmu *pmu);
struct pt_regs;
extern unsigned long perf_misc_flags(struct pt_regs *regs);
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index baea657bc868..efad07081cc0 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -249,6 +249,7 @@
#define PPC_INST_COPY 0x7c20060c
#define PPC_INST_DCBA 0x7c0005ec
#define PPC_INST_DCBA_MASK 0xfc0007fe
+#define PPC_INST_DSSALL 0x7e00066c
#define PPC_INST_ISEL 0x7c00001e
#define PPC_INST_ISEL_MASK 0xfc00003e
#define PPC_INST_LSWI 0x7c0004aa
@@ -393,6 +394,7 @@
(0x7c000264 | ___PPC_RB(rb) | ___PPC_RS(rs) | ___PPC_RIC(ric) | ___PPC_PRS(prs) | ___PPC_R(r))
#define PPC_RAW_TLBIEL(rb, rs, ric, prs, r) \
(0x7c000224 | ___PPC_RB(rb) | ___PPC_RS(rs) | ___PPC_RIC(ric) | ___PPC_PRS(prs) | ___PPC_R(r))
+#define PPC_RAW_TLBIEL_v205(rb, l) (0x7c000224 | ___PPC_RB(rb) | (l << 21))
#define PPC_RAW_TLBSRX_DOT(a, b) (0x7c0006a5 | __PPC_RA0(a) | __PPC_RB(b))
#define PPC_RAW_TLBIVAX(a, b) (0x7c000624 | __PPC_RA0(a) | __PPC_RB(b))
#define PPC_RAW_ERATWE(s, a, w) (0x7c0001a6 | __PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w))
@@ -566,6 +568,8 @@
#define PPC_RAW_MTSPR(spr, d) (0x7c0003a6 | ___PPC_RS(d) | __PPC_SPR(spr))
#define PPC_RAW_EIEIO() (0x7c0006ac)
+#define PPC_RAW_BRANCH(addr) (PPC_INST_BRANCH | ((addr) & 0x03fffffc))
+
/* Deal with instructions that older assemblers aren't aware of */
#define PPC_BCCTR_FLUSH stringify_in_c(.long PPC_INST_BCCTR_FLUSH)
#define PPC_CP_ABORT stringify_in_c(.long PPC_RAW_CP_ABORT)
@@ -575,6 +579,7 @@
#define PPC_DCBZL(a, b) stringify_in_c(.long PPC_RAW_DCBZL(a, b))
#define PPC_DIVDE(t, a, b) stringify_in_c(.long PPC_RAW_DIVDE(t, a, b))
#define PPC_DIVDEU(t, a, b) stringify_in_c(.long PPC_RAW_DIVDEU(t, a, b))
+#define PPC_DSSALL stringify_in_c(.long PPC_INST_DSSALL)
#define PPC_LQARX(t, a, b, eh) stringify_in_c(.long PPC_RAW_LQARX(t, a, b, eh))
#define PPC_STQCX(t, a, b) stringify_in_c(.long PPC_RAW_STQCX(t, a, b))
#define PPC_MADDHD(t, a, b, c) stringify_in_c(.long PPC_RAW_MADDHD(t, a, b, c))
@@ -602,6 +607,7 @@
stringify_in_c(.long PPC_RAW_TLBIE_5(rb, rs, ric, prs, r))
#define PPC_TLBIEL(rb,rs,ric,prs,r) \
stringify_in_c(.long PPC_RAW_TLBIEL(rb, rs, ric, prs, r))
+#define PPC_TLBIEL_v205(rb, l) stringify_in_c(.long PPC_RAW_TLBIEL_v205(rb, l))
#define PPC_TLBSRX_DOT(a, b) stringify_in_c(.long PPC_RAW_TLBSRX_DOT(a, b))
#define PPC_TLBIVAX(a, b) stringify_in_c(.long PPC_RAW_TLBIVAX(a, b))
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 7be24048b8d1..f21e6bde17a1 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -17,29 +17,40 @@
#define SZL (BITS_PER_LONG/8)
/*
+ * This expands to a sequence of operations with reg incrementing from
+ * start to end inclusive, of this form:
+ *
+ * op reg, (offset + (width * reg))(base)
+ *
+ * Note that offset is not the offset of the first operation unless start
+ * is zero (or width is zero).
+ */
+.macro OP_REGS op, width, start, end, base, offset
+ .Lreg=\start
+ .rept (\end - \start + 1)
+ \op .Lreg, \offset + \width * .Lreg(\base)
+ .Lreg=.Lreg+1
+ .endr
+.endm
+
+/*
* Macros for storing registers into and loading registers from
* exception frames.
*/
#ifdef __powerpc64__
-#define SAVE_GPR(n, base) std n,GPR0+8*(n)(base)
-#define REST_GPR(n, base) ld n,GPR0+8*(n)(base)
-#define SAVE_NVGPRS(base) SAVE_8GPRS(14, base); SAVE_10GPRS(22, base)
-#define REST_NVGPRS(base) REST_8GPRS(14, base); REST_10GPRS(22, base)
+#define SAVE_GPRS(start, end, base) OP_REGS std, 8, start, end, base, GPR0
+#define REST_GPRS(start, end, base) OP_REGS ld, 8, start, end, base, GPR0
+#define SAVE_NVGPRS(base) SAVE_GPRS(14, 31, base)
+#define REST_NVGPRS(base) REST_GPRS(14, 31, base)
#else
-#define SAVE_GPR(n, base) stw n,GPR0+4*(n)(base)
-#define REST_GPR(n, base) lwz n,GPR0+4*(n)(base)
-#define SAVE_NVGPRS(base) SAVE_GPR(13, base); SAVE_8GPRS(14, base); SAVE_10GPRS(22, base)
-#define REST_NVGPRS(base) REST_GPR(13, base); REST_8GPRS(14, base); REST_10GPRS(22, base)
+#define SAVE_GPRS(start, end, base) OP_REGS stw, 4, start, end, base, GPR0
+#define REST_GPRS(start, end, base) OP_REGS lwz, 4, start, end, base, GPR0
+#define SAVE_NVGPRS(base) SAVE_GPRS(13, 31, base)
+#define REST_NVGPRS(base) REST_GPRS(13, 31, base)
#endif
-#define SAVE_2GPRS(n, base) SAVE_GPR(n, base); SAVE_GPR(n+1, base)
-#define SAVE_4GPRS(n, base) SAVE_2GPRS(n, base); SAVE_2GPRS(n+2, base)
-#define SAVE_8GPRS(n, base) SAVE_4GPRS(n, base); SAVE_4GPRS(n+4, base)
-#define SAVE_10GPRS(n, base) SAVE_8GPRS(n, base); SAVE_2GPRS(n+8, base)
-#define REST_2GPRS(n, base) REST_GPR(n, base); REST_GPR(n+1, base)
-#define REST_4GPRS(n, base) REST_2GPRS(n, base); REST_2GPRS(n+2, base)
-#define REST_8GPRS(n, base) REST_4GPRS(n, base); REST_4GPRS(n+4, base)
-#define REST_10GPRS(n, base) REST_8GPRS(n, base); REST_2GPRS(n+8, base)
+#define SAVE_GPR(n, base) SAVE_GPRS(n, n, base)
+#define REST_GPR(n, base) REST_GPRS(n, n, base)
#define SAVE_FPR(n, base) stfd n,8*TS_FPRWIDTH*(n)(base)
#define SAVE_2FPRS(n, base) SAVE_FPR(n, base); SAVE_FPR(n+1, base)
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index e39bd0ff69f3..2c8686d9e964 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -157,8 +157,12 @@ struct thread_struct {
#ifdef CONFIG_PPC_BOOK3S_32
unsigned long r0, r3, r4, r5, r6, r8, r9, r11;
unsigned long lr, ctr;
+ unsigned long sr0;
#endif
#endif /* CONFIG_PPC32 */
+#if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP)
+ unsigned long pid; /* value written in PID reg. at interrupt exit */
+#endif
/* Debug Registers */
struct debug_reg debug;
#ifdef CONFIG_PPC_FPU_REGS
@@ -191,8 +195,10 @@ struct thread_struct {
int used_vsr; /* set if process has used VSX */
#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
- unsigned long evr[32]; /* upper 32-bits of SPE regs */
- u64 acc; /* Accumulator */
+ struct_group(spe,
+ unsigned long evr[32]; /* upper 32-bits of SPE regs */
+ u64 acc; /* Accumulator */
+ );
unsigned long spefscr; /* SPE & eFP status */
unsigned long spefscr_last; /* SPEFSCR value on last prctl
call or trap return */
@@ -276,6 +282,12 @@ struct thread_struct {
#define SPEFSCR_INIT
#endif
+#ifdef CONFIG_PPC_BOOK3S_32
+#define SR0_INIT .sr0 = IS_ENABLED(CONFIG_PPC_KUEP) ? SR_NX : 0,
+#else
+#define SR0_INIT
+#endif
+
#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
#define INIT_THREAD { \
.ksp = INIT_SP, \
@@ -283,6 +295,7 @@ struct thread_struct {
.kuap = ~0UL, /* KUAP_NONE */ \
.fpexc_mode = MSR_FE0 | MSR_FE1, \
SPEFSCR_INIT \
+ SR0_INIT \
}
#elif defined(CONFIG_PPC32)
#define INIT_THREAD { \
@@ -290,6 +303,7 @@ struct thread_struct {
.pgdir = swapper_pg_dir, \
.fpexc_mode = MSR_FE0 | MSR_FE1, \
SPEFSCR_INIT \
+ SR0_INIT \
}
#else
#define INIT_THREAD { \
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 6e560f035614..42f89e2d8f04 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -291,7 +291,7 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
static inline bool cpu_has_msr_ri(void)
{
- return !IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x);
+ return !IS_ENABLED(CONFIG_BOOKE_OR_40x);
}
static inline bool regs_is_unrecoverable(struct pt_regs *regs)
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index e9d27265253b..2835f6363228 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -18,9 +18,9 @@
#include <asm/feature-fixups.h>
/* Pickup Book E specific registers. */
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#ifdef CONFIG_BOOKE_OR_40x
#include <asm/reg_booke.h>
-#endif /* CONFIG_BOOKE || CONFIG_40x */
+#endif
#ifdef CONFIG_FSL_EMB_PERFMON
#include <asm/reg_fsl_emb.h>
@@ -1366,6 +1366,18 @@
/* Macros for setting and retrieving special purpose registers */
#ifndef __ASSEMBLY__
+
+#if defined(CONFIG_PPC64) || defined(__CHECKER__)
+typedef struct {
+ u32 val;
+#ifdef CONFIG_PPC64
+ u32 suffix;
+#endif
+} __packed ppc_inst_t;
+#else
+typedef u32 ppc_inst_t;
+#endif
+
#define mfmsr() ({unsigned long rval; \
asm volatile("mfmsr %0" : "=r" (rval) : \
: "memory"); rval;})
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 9dc97d2f9d27..82e5b055fa2a 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -264,7 +264,7 @@ extern void rtas_get_rtc_time(struct rtc_time *rtc_time);
extern int rtas_set_rtc_time(struct rtc_time *rtc_time);
extern unsigned int rtas_busy_delay_time(int status);
-extern unsigned int rtas_busy_delay(int status);
+bool rtas_busy_delay(int status);
extern int early_init_dt_scan_rtas(unsigned long node,
const char *uname, int depth, void *data);
diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h
index 79cb7a25a5fb..38f79e42bf3c 100644
--- a/arch/powerpc/include/asm/sections.h
+++ b/arch/powerpc/include/asm/sections.h
@@ -25,16 +25,16 @@ extern char start_virt_trampolines[];
extern char end_virt_trampolines[];
#endif
+/*
+ * This assumes the kernel is never compiled -mcmodel=small or
+ * the total .toc is always less than 64k.
+ */
static inline unsigned long kernel_toc_addr(void)
{
- /* Defined by the linker, see vmlinux.lds.S */
- extern unsigned long __toc_start;
-
- /*
- * The TOC register (r2) points 32kB into the TOC, so that 64kB of
- * the TOC can be addressed using a single machine instruction.
- */
- return (unsigned long)(&__toc_start) + 0x8000UL;
+ unsigned long toc_ptr;
+
+ asm volatile("mr %0, 2" : "=r" (toc_ptr));
+ return toc_ptr;
}
static inline int overlaps_interrupt_vector_text(unsigned long start,
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index 6c1a7d217d1a..d0d3dd531c7f 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -9,7 +9,6 @@ extern void ppc_printk_progress(char *s, unsigned short hex);
extern unsigned int rtas_data;
extern unsigned long long memory_limit;
-extern bool init_mem_is_free;
extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
struct device_node;
@@ -32,7 +31,7 @@ void setup_panic(void);
extern bool pseries_enable_reloc_on_exc(void);
extern void pseries_disable_reloc_on_exc(void);
extern void pseries_big_endian_exceptions(void);
-extern void pseries_little_endian_exceptions(void);
+void __init pseries_little_endian_exceptions(void);
#else
static inline bool pseries_enable_reloc_on_exc(void) { return false; }
static inline void pseries_disable_reloc_on_exc(void) {}
@@ -55,7 +54,7 @@ void setup_entry_flush(bool enable);
void setup_uaccess_flush(bool enable);
void do_rfi_flush_fixups(enum l1d_flush_type types);
#ifdef CONFIG_PPC_BARRIER_NOSPEC
-void setup_barrier_nospec(void);
+void __init setup_barrier_nospec(void);
#else
static inline void setup_barrier_nospec(void) { }
#endif
@@ -71,11 +70,11 @@ static inline void do_barrier_nospec_fixups_range(bool enable, void *start, void
#endif
#ifdef CONFIG_PPC_FSL_BOOK3E
-void setup_spectre_v2(void);
+void __init setup_spectre_v2(void);
#else
static inline void setup_spectre_v2(void) {}
#endif
-void do_btb_flush_fixups(void);
+void __init do_btb_flush_fixups(void);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/smu.h b/arch/powerpc/include/asm/smu.h
index 4b30a0205c93..2ac6ab903023 100644
--- a/arch/powerpc/include/asm/smu.h
+++ b/arch/powerpc/include/asm/smu.h
@@ -456,7 +456,7 @@ extern void smu_poll(void);
/*
* Init routine, presence check....
*/
-extern int smu_init(void);
+int __init smu_init(void);
extern int smu_present(void);
struct platform_device;
extern struct platform_device *smu_get_ofdev(void);
diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h
index 1df867c2e054..50950deedb87 100644
--- a/arch/powerpc/include/asm/sstep.h
+++ b/arch/powerpc/include/asm/sstep.h
@@ -145,7 +145,7 @@ union vsx_reg {
* otherwise.
*/
extern int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
- struct ppc_inst instr);
+ ppc_inst_t instr);
/*
* Emulate an instruction that can be executed just by updating
@@ -162,7 +162,7 @@ void emulate_update_regs(struct pt_regs *reg, struct instruction_op *op);
* 0 if it could not be emulated, or -1 for an instruction that
* should not be emulated (rfid, mtmsrd clearing MSR_RI, etc.).
*/
-extern int emulate_step(struct pt_regs *regs, struct ppc_inst instr);
+int emulate_step(struct pt_regs *regs, ppc_inst_t instr);
/*
* Emulate a load or store instruction by reading/writing the
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index 9d1fbd8be1c7..1f43ef696033 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -112,6 +112,9 @@ static inline void clear_task_ebb(struct task_struct *t)
#endif
}
+void kvmppc_save_user_regs(void);
+void kvmppc_save_current_sprs(void);
+
extern int set_thread_tidr(struct task_struct *t);
#endif /* _ASM_POWERPC_SWITCH_TO_H */
diff --git a/arch/powerpc/include/asm/task_size_64.h b/arch/powerpc/include/asm/task_size_64.h
index c993482237ed..38fdf8041d12 100644
--- a/arch/powerpc/include/asm/task_size_64.h
+++ b/arch/powerpc/include/asm/task_size_64.h
@@ -44,11 +44,7 @@
*/
#define TASK_SIZE_USER32 (0x0000000100000000UL - (1 * PAGE_SIZE))
-#define TASK_SIZE_OF(tsk) \
- (test_tsk_thread_flag(tsk, TIF_32BIT) ? TASK_SIZE_USER32 : \
- TASK_SIZE_USER64)
-
-#define TASK_SIZE TASK_SIZE_OF(current)
+#define TASK_SIZE (is_32bit_task() ? TASK_SIZE_USER32 : TASK_SIZE_USER64)
#define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4))
#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(DEFAULT_MAP_WINDOW_USER64 / 4))
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 8c2c3dd4ddba..924b2157882f 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -18,6 +18,8 @@
#include <asm/vdso/timebase.h>
/* time.c */
+extern u64 decrementer_max;
+
extern unsigned long tb_ticks_per_jiffy;
extern unsigned long tb_ticks_per_usec;
extern unsigned long tb_ticks_per_sec;
@@ -97,19 +99,16 @@ extern void div128_by_32(u64 dividend_high, u64 dividend_low,
extern void secondary_cpu_time_init(void);
extern void __init time_init(void);
-#ifdef CONFIG_PPC64
-static inline unsigned long test_irq_work_pending(void)
-{
- unsigned long x;
+DECLARE_PER_CPU(u64, decrementers_next_tb);
- asm volatile("lbz %0,%1(13)"
- : "=r" (x)
- : "i" (offsetof(struct paca_struct, irq_work_pending)));
- return x;
+static inline u64 timer_get_next_tb(void)
+{
+ return __this_cpu_read(decrementers_next_tb);
}
-#endif
-DECLARE_PER_CPU(u64, decrementers_next_tb);
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void timer_rearm_host_dec(u64 now);
+#endif
/* Convert timebase ticks to nanoseconds */
unsigned long long tb_to_ns(unsigned long long tb_ticks);
diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h
index 0ea9e70ed78b..b4aa0d88ce2c 100644
--- a/arch/powerpc/include/asm/udbg.h
+++ b/arch/powerpc/include/asm/udbg.h
@@ -23,14 +23,14 @@ extern void udbg_printf(const char *fmt, ...)
__attribute__ ((format (printf, 1, 2)));
extern void udbg_progress(char *s, unsigned short hex);
-extern void udbg_uart_init_mmio(void __iomem *addr, unsigned int stride);
-extern void udbg_uart_init_pio(unsigned long port, unsigned int stride);
+void __init udbg_uart_init_mmio(void __iomem *addr, unsigned int stride);
+void __init udbg_uart_init_pio(unsigned long port, unsigned int stride);
-extern void udbg_uart_setup(unsigned int speed, unsigned int clock);
-extern unsigned int udbg_probe_uart_speed(unsigned int clock);
+void __init udbg_uart_setup(unsigned int speed, unsigned int clock);
+unsigned int __init udbg_probe_uart_speed(unsigned int clock);
struct device_node;
-extern void udbg_scc_init(int force_scc);
+void __init udbg_scc_init(int force_scc);
extern int udbg_adb_init(int force_btext);
extern void udbg_adb_init_early(void);
diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h
index fe683371336f..a7ae1860115a 100644
--- a/arch/powerpc/include/asm/uprobes.h
+++ b/arch/powerpc/include/asm/uprobes.h
@@ -11,7 +11,6 @@
#include <linux/notifier.h>
#include <asm/probes.h>
-#include <asm/inst.h>
typedef ppc_opcode_t uprobe_opcode_t;
diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
index 0ac9bfddf704..e2e704eca5f6 100644
--- a/arch/powerpc/include/asm/xics.h
+++ b/arch/powerpc/include/asm/xics.h
@@ -38,13 +38,13 @@ static inline int icp_native_init(void) { return -ENODEV; }
/* PAPR ICP */
#ifdef CONFIG_PPC_ICP_HV
-extern int icp_hv_init(void);
+int __init icp_hv_init(void);
#else
static inline int icp_hv_init(void) { return -ENODEV; }
#endif
#ifdef CONFIG_PPC_POWERNV
-extern int icp_opal_init(void);
+int __init icp_opal_init(void);
extern void icp_opal_flush_interrupt(void);
#else
static inline int icp_opal_init(void) { return -ENODEV; }
diff --git a/arch/powerpc/include/asm/xmon.h b/arch/powerpc/include/asm/xmon.h
index 68bfb2361f03..f2d44b44f46c 100644
--- a/arch/powerpc/include/asm/xmon.h
+++ b/arch/powerpc/include/asm/xmon.h
@@ -12,7 +12,7 @@
#ifdef CONFIG_XMON
extern void xmon_setup(void);
-extern void xmon_register_spus(struct list_head *list);
+void __init xmon_register_spus(struct list_head *list);
struct pt_regs;
extern int xmon(struct pt_regs *excp);
extern irqreturn_t xmon_irq(int, void *);
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index b039877c743d..4d7829399570 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -11,6 +11,7 @@ CFLAGS_prom_init.o += -fPIC
CFLAGS_btext.o += -fPIC
endif
+CFLAGS_early_32.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_cputable.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_prom_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index bf96b954a4eb..3e37ece06739 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -105,7 +105,7 @@ static struct aligninfo spe_aligninfo[32] = {
* so we don't need the address swizzling.
*/
static int emulate_spe(struct pt_regs *regs, unsigned int reg,
- struct ppc_inst ppc_instr)
+ ppc_inst_t ppc_instr)
{
union {
u64 ll;
@@ -300,7 +300,7 @@ Efault_write:
int fix_alignment(struct pt_regs *regs)
{
- struct ppc_inst instr;
+ ppc_inst_t instr;
struct instruction_op op;
int r, type;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index cc05522f50bf..7582f3e3a330 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -54,7 +54,7 @@
#endif
#ifdef CONFIG_PPC32
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#ifdef CONFIG_BOOKE_OR_40x
#include "head_booke.h"
#endif
#endif
@@ -139,6 +139,7 @@ int main(void)
OFFSET(THR11, thread_struct, r11);
OFFSET(THLR, thread_struct, lr);
OFFSET(THCTR, thread_struct, ctr);
+ OFFSET(THSR0, thread_struct, sr0);
#endif
#ifdef CONFIG_SPE
OFFSET(THREAD_EVR0, thread_struct, evr[0]);
@@ -218,10 +219,12 @@ int main(void)
OFFSET(PACA_EXGEN, paca_struct, exgen);
OFFSET(PACA_EXMC, paca_struct, exmc);
OFFSET(PACA_EXNMI, paca_struct, exnmi);
+#ifdef CONFIG_PPC_64S_HASH_MMU
OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr);
OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid);
OFFSET(SLBSHADOW_STACKESID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid);
OFFSET(SLBSHADOW_SAVEAREA, slb_shadow, save_area);
+#endif
OFFSET(LPPACA_PMCINUSE, lppaca, pmcregs_in_use);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
OFFSET(PACA_PMCINUSE, paca_struct, pmcregs_in_use);
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index 803c2a45b22a..9d9d56b574cc 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -161,7 +161,7 @@ void btext_map(void)
boot_text_mapped = 1;
}
-static int btext_initialize(struct device_node *np)
+static int __init btext_initialize(struct device_node *np)
{
unsigned int width, height, depth, pitch;
unsigned long address = 0;
@@ -241,8 +241,10 @@ int __init btext_find_display(int allow_nonstdout)
rc = btext_initialize(np);
printk("result: %d\n", rc);
}
- if (rc == 0)
+ if (rc == 0) {
+ of_node_put(np);
break;
+ }
}
return rc;
}
@@ -290,7 +292,7 @@ void btext_update_display(unsigned long phys, int width, int height,
}
EXPORT_SYMBOL(btext_update_display);
-void btext_clearscreen(void)
+void __init btext_clearscreen(void)
{
unsigned int *base = (unsigned int *)calc_base(0, 0);
unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) *
@@ -308,7 +310,7 @@ void btext_clearscreen(void)
rmci_maybe_off();
}
-void btext_flushscreen(void)
+void __init btext_flushscreen(void)
{
unsigned int *base = (unsigned int *)calc_base(0, 0);
unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) *
@@ -327,7 +329,7 @@ void btext_flushscreen(void)
__asm__ __volatile__ ("sync" ::: "memory");
}
-void btext_flushline(void)
+void __init btext_flushline(void)
{
unsigned int *base = (unsigned int *)calc_base(0, g_loc_Y << 4);
unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) *
@@ -542,7 +544,7 @@ void btext_drawstring(const char *c)
btext_drawchar(*c++);
}
-void btext_drawtext(const char *c, unsigned int len)
+void __init btext_drawtext(const char *c, unsigned int len)
{
if (!boot_text_mapped)
return;
@@ -550,7 +552,7 @@ void btext_drawtext(const char *c, unsigned int len)
btext_drawchar(*c++);
}
-void btext_drawhex(unsigned long v)
+void __init btext_drawhex(unsigned long v)
{
if (!boot_text_mapped)
return;
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index cf1be75b7833..00b0992be3e7 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -710,7 +710,7 @@ static struct kobj_attribute cache_shared_cpu_list_attr =
__ATTR(shared_cpu_list, 0444, shared_cpu_list_show, NULL);
/* Attributes which should always be created -- the kobject/sysfs core
- * does this automatically via kobj_type->default_attrs. This is the
+ * does this automatically via kobj_type->default_groups. This is the
* minimum data required to uniquely identify a cache.
*/
static struct attribute *cache_index_default_attrs[] = {
@@ -720,6 +720,7 @@ static struct attribute *cache_index_default_attrs[] = {
&cache_shared_cpu_list_attr.attr,
NULL,
};
+ATTRIBUTE_GROUPS(cache_index_default);
/* Attributes which should be created if the cache device node has the
* right properties -- see cacheinfo_create_index_opt_attrs
@@ -738,7 +739,7 @@ static const struct sysfs_ops cache_index_ops = {
static struct kobj_type cache_index_type = {
.release = cache_index_release,
.sysfs_ops = &cache_index_ops,
- .default_attrs = cache_index_default_attrs,
+ .default_groups = cache_index_default_groups,
};
static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c
index 3cca88ee96d7..3dc61e203f37 100644
--- a/arch/powerpc/kernel/cpu_setup_power.c
+++ b/arch/powerpc/kernel/cpu_setup_power.c
@@ -109,7 +109,7 @@ static void init_PMU_HV_ISA207(void)
static void init_PMU(void)
{
mtspr(SPRN_MMCRA, 0);
- mtspr(SPRN_MMCR0, 0);
+ mtspr(SPRN_MMCR0, MMCR0_FC);
mtspr(SPRN_MMCR1, 0);
mtspr(SPRN_MMCR2, 0);
}
@@ -123,7 +123,7 @@ static void init_PMU_ISA31(void)
{
mtspr(SPRN_MMCR3, 0);
mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
- mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
+ mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
}
/*
@@ -137,6 +137,7 @@ void __setup_cpu_power7(unsigned long offset, struct cpu_spec *t)
return;
mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH);
}
@@ -150,6 +151,7 @@ void __restore_cpu_power7(void)
return;
mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH);
}
@@ -164,6 +166,7 @@ void __setup_cpu_power8(unsigned long offset, struct cpu_spec *t)
return;
mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */
init_HFSCR();
@@ -184,6 +187,7 @@ void __restore_cpu_power8(void)
return;
mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */
init_HFSCR();
@@ -202,6 +206,7 @@ void __setup_cpu_power9(unsigned long offset, struct cpu_spec *t)
mtspr(SPRN_PSSCR, 0);
mtspr(SPRN_LPID, 0);
mtspr(SPRN_PID, 0);
+ mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
@@ -223,6 +228,7 @@ void __restore_cpu_power9(void)
mtspr(SPRN_PSSCR, 0);
mtspr(SPRN_LPID, 0);
mtspr(SPRN_PID, 0);
+ mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
@@ -242,6 +248,7 @@ void __setup_cpu_power10(unsigned long offset, struct cpu_spec *t)
mtspr(SPRN_PSSCR, 0);
mtspr(SPRN_LPID, 0);
mtspr(SPRN_PID, 0);
+ mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
@@ -264,6 +271,7 @@ void __restore_cpu_power10(void)
mtspr(SPRN_PSSCR, 0);
mtspr(SPRN_LPID, 0);
mtspr(SPRN_PID, 0);
+ mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index 5545c9cd17c1..f55c6fb34a3a 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -27,7 +27,8 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception)
ppc_msgsync();
- may_hard_irq_enable();
+ if (should_hard_irq_enable())
+ do_hard_irq_enable();
kvmppc_clear_host_ipi(smp_processor_id());
__this_cpu_inc(irq_stat.doorbell_irqs);
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index ba527fb52993..7d1b2c4a4891 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -80,6 +80,7 @@ static void __restore_cpu_cpufeatures(void)
mtspr(SPRN_LPCR, system_registers.lpcr);
if (hv_mode) {
mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_HFSCR, system_registers.hfscr);
mtspr(SPRN_PCR, system_registers.pcr);
}
@@ -216,6 +217,7 @@ static int __init feat_enable_hv(struct dt_cpu_feature *f)
}
mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_AMOR, ~0);
lpcr = mfspr(SPRN_LPCR);
lpcr &= ~LPCR_LPES0; /* HV external interrupts */
@@ -271,6 +273,9 @@ static int __init feat_enable_mmu_hash(struct dt_cpu_feature *f)
{
u64 lpcr;
+ if (!IS_ENABLED(CONFIG_PPC_64S_HASH_MMU))
+ return 0;
+
lpcr = mfspr(SPRN_LPCR);
lpcr &= ~LPCR_ISL;
@@ -290,6 +295,9 @@ static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f)
{
u64 lpcr;
+ if (!IS_ENABLED(CONFIG_PPC_64S_HASH_MMU))
+ return 0;
+
lpcr = mfspr(SPRN_LPCR);
lpcr &= ~(LPCR_ISL | LPCR_UPRT | LPCR_HR);
mtspr(SPRN_LPCR, lpcr);
@@ -303,15 +311,15 @@ static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f)
static int __init feat_enable_mmu_radix(struct dt_cpu_feature *f)
{
-#ifdef CONFIG_PPC_RADIX_MMU
+ if (!IS_ENABLED(CONFIG_PPC_RADIX_MMU))
+ return 0;
+
+ cur_cpu_spec->mmu_features |= MMU_FTR_KERNEL_RO;
cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
- cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE;
cur_cpu_spec->mmu_features |= MMU_FTR_GTSE;
cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU;
return 1;
-#endif
- return 0;
}
static int __init feat_enable_dscr(struct dt_cpu_feature *f)
@@ -336,7 +344,7 @@ static int __init feat_enable_dscr(struct dt_cpu_feature *f)
return 1;
}
-static void hfscr_pmu_enable(void)
+static void __init hfscr_pmu_enable(void)
{
u64 hfscr = mfspr(SPRN_HFSCR);
hfscr |= PPC_BIT(60);
@@ -351,7 +359,7 @@ static void init_pmu_power8(void)
}
mtspr(SPRN_MMCRA, 0);
- mtspr(SPRN_MMCR0, 0);
+ mtspr(SPRN_MMCR0, MMCR0_FC);
mtspr(SPRN_MMCR1, 0);
mtspr(SPRN_MMCR2, 0);
mtspr(SPRN_MMCRS, 0);
@@ -390,7 +398,7 @@ static void init_pmu_power9(void)
mtspr(SPRN_MMCRC, 0);
mtspr(SPRN_MMCRA, 0);
- mtspr(SPRN_MMCR0, 0);
+ mtspr(SPRN_MMCR0, MMCR0_FC);
mtspr(SPRN_MMCR1, 0);
mtspr(SPRN_MMCR2, 0);
}
@@ -426,7 +434,7 @@ static void init_pmu_power10(void)
mtspr(SPRN_MMCR3, 0);
mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
- mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
+ mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
}
static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f)
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index 9bdaaf7fddc9..2f9dbf8ad2ee 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -280,7 +280,7 @@ static int eeh_addr_cache_show(struct seq_file *s, void *v)
}
DEFINE_SHOW_ATTRIBUTE(eeh_addr_cache);
-void eeh_cache_debugfs_init(void)
+void __init eeh_cache_debugfs_init(void)
{
debugfs_create_file_unsafe("eeh_address_cache", 0400,
arch_debugfs_dir, NULL,
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 350dab18e137..422f80b5b27b 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -905,18 +905,19 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
}
#endif /* CONFIG_STACKTRACE */
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+ edev->mode &= ~EEH_DEV_NO_HANDLER;
+
eeh_pe_update_time_stamp(pe);
pe->freeze_count++;
if (pe->freeze_count > eeh_max_freezes) {
pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n",
pe->phb->global_number, pe->addr,
pe->freeze_count);
- result = PCI_ERS_RESULT_DISCONNECT;
- }
- eeh_for_each_pe(pe, tmp_pe)
- eeh_pe_for_each_dev(tmp_pe, edev, tmp)
- edev->mode &= ~EEH_DEV_NO_HANDLER;
+ goto recover_failed;
+ }
/* Walk the various device drivers attached to this slot through
* a reset sequence, giving each an opportunity to do what it needs
@@ -928,39 +929,38 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
* the error. Override the result if necessary to have partially
* hotplug for this case.
*/
- if (result != PCI_ERS_RESULT_DISCONNECT) {
- pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
- pe->freeze_count, eeh_max_freezes);
- pr_info("EEH: Notify device drivers to shutdown\n");
- eeh_set_channel_state(pe, pci_channel_io_frozen);
- eeh_set_irq_state(pe, false);
- eeh_pe_report("error_detected(IO frozen)", pe,
- eeh_report_error, &result);
- if ((pe->type & EEH_PE_PHB) &&
- result != PCI_ERS_RESULT_NONE &&
- result != PCI_ERS_RESULT_NEED_RESET)
- result = PCI_ERS_RESULT_NEED_RESET;
- }
+ pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
+ pe->freeze_count, eeh_max_freezes);
+ pr_info("EEH: Notify device drivers to shutdown\n");
+ eeh_set_channel_state(pe, pci_channel_io_frozen);
+ eeh_set_irq_state(pe, false);
+ eeh_pe_report("error_detected(IO frozen)", pe,
+ eeh_report_error, &result);
+ if (result == PCI_ERS_RESULT_DISCONNECT)
+ goto recover_failed;
+
+ /*
+ * Error logged on a PHB are always fences which need a full
+ * PHB reset to clear so force that to happen.
+ */
+ if ((pe->type & EEH_PE_PHB) && result != PCI_ERS_RESULT_NONE)
+ result = PCI_ERS_RESULT_NEED_RESET;
/* Get the current PCI slot state. This can take a long time,
* sometimes over 300 seconds for certain systems.
*/
- if (result != PCI_ERS_RESULT_DISCONNECT) {
- rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
- if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
- pr_warn("EEH: Permanent failure\n");
- result = PCI_ERS_RESULT_DISCONNECT;
- }
+ rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY * 1000);
+ if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
+ pr_warn("EEH: Permanent failure\n");
+ goto recover_failed;
}
/* Since rtas may enable MMIO when posting the error log,
* don't post the error log until after all dev drivers
* have been informed.
*/
- if (result != PCI_ERS_RESULT_DISCONNECT) {
- pr_info("EEH: Collect temporary log\n");
- eeh_slot_error_detail(pe, EEH_LOG_TEMP);
- }
+ pr_info("EEH: Collect temporary log\n");
+ eeh_slot_error_detail(pe, EEH_LOG_TEMP);
/* If all device drivers were EEH-unaware, then shut
* down all of the device drivers, and hope they
@@ -970,9 +970,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
pr_info("EEH: Reset with hotplug activity\n");
rc = eeh_reset_device(pe, bus, NULL, false);
if (rc) {
- pr_warn("%s: Unable to reset, err=%d\n",
- __func__, rc);
- result = PCI_ERS_RESULT_DISCONNECT;
+ pr_warn("%s: Unable to reset, err=%d\n", __func__, rc);
+ goto recover_failed;
}
}
@@ -980,10 +979,10 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
if (result == PCI_ERS_RESULT_CAN_RECOVER) {
pr_info("EEH: Enable I/O for affected devices\n");
rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
+ if (rc < 0)
+ goto recover_failed;
- if (rc < 0) {
- result = PCI_ERS_RESULT_DISCONNECT;
- } else if (rc) {
+ if (rc) {
result = PCI_ERS_RESULT_NEED_RESET;
} else {
pr_info("EEH: Notify device drivers to resume I/O\n");
@@ -991,15 +990,13 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
eeh_report_mmio_enabled, &result);
}
}
-
- /* If all devices reported they can proceed, then re-enable DMA */
if (result == PCI_ERS_RESULT_CAN_RECOVER) {
pr_info("EEH: Enabled DMA for affected devices\n");
rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
+ if (rc < 0)
+ goto recover_failed;
- if (rc < 0) {
- result = PCI_ERS_RESULT_DISCONNECT;
- } else if (rc) {
+ if (rc) {
result = PCI_ERS_RESULT_NEED_RESET;
} else {
/*
@@ -1017,16 +1014,15 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
pr_info("EEH: Reset without hotplug activity\n");
rc = eeh_reset_device(pe, bus, &rmv_data, true);
if (rc) {
- pr_warn("%s: Cannot reset, err=%d\n",
- __func__, rc);
- result = PCI_ERS_RESULT_DISCONNECT;
- } else {
- result = PCI_ERS_RESULT_NONE;
- eeh_set_channel_state(pe, pci_channel_io_normal);
- eeh_set_irq_state(pe, true);
- eeh_pe_report("slot_reset", pe, eeh_report_reset,
- &result);
+ pr_warn("%s: Cannot reset, err=%d\n", __func__, rc);
+ goto recover_failed;
}
+
+ result = PCI_ERS_RESULT_NONE;
+ eeh_set_channel_state(pe, pci_channel_io_normal);
+ eeh_set_irq_state(pe, true);
+ eeh_pe_report("slot_reset", pe, eeh_report_reset,
+ &result);
}
if ((result == PCI_ERS_RESULT_RECOVERED) ||
@@ -1054,45 +1050,47 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
}
pr_info("EEH: Recovery successful.\n");
- } else {
- /*
- * About 90% of all real-life EEH failures in the field
- * are due to poorly seated PCI cards. Only 10% or so are
- * due to actual, failed cards.
- */
- pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
- "Please try reseating or replacing it\n",
- pe->phb->global_number, pe->addr);
+ goto out;
+ }
- eeh_slot_error_detail(pe, EEH_LOG_PERM);
+recover_failed:
+ /*
+ * About 90% of all real-life EEH failures in the field
+ * are due to poorly seated PCI cards. Only 10% or so are
+ * due to actual, failed cards.
+ */
+ pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
+ "Please try reseating or replacing it\n",
+ pe->phb->global_number, pe->addr);
- /* Notify all devices that they're about to go down. */
- eeh_set_channel_state(pe, pci_channel_io_perm_failure);
- eeh_set_irq_state(pe, false);
- eeh_pe_report("error_detected(permanent failure)", pe,
- eeh_report_failure, NULL);
+ eeh_slot_error_detail(pe, EEH_LOG_PERM);
- /* Mark the PE to be removed permanently */
- eeh_pe_state_mark(pe, EEH_PE_REMOVED);
+ /* Notify all devices that they're about to go down. */
+ eeh_set_channel_state(pe, pci_channel_io_perm_failure);
+ eeh_set_irq_state(pe, false);
+ eeh_pe_report("error_detected(permanent failure)", pe,
+ eeh_report_failure, NULL);
- /*
- * Shut down the device drivers for good. We mark
- * all removed devices correctly to avoid access
- * the their PCI config any more.
- */
- if (pe->type & EEH_PE_VF) {
- eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
- eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
- } else {
- eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
- eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+ /* Mark the PE to be removed permanently */
+ eeh_pe_state_mark(pe, EEH_PE_REMOVED);
- pci_lock_rescan_remove();
- pci_hp_remove_devices(bus);
- pci_unlock_rescan_remove();
- /* The passed PE should no longer be used */
- return;
- }
+ /*
+ * Shut down the device drivers for good. We mark
+ * all removed devices correctly to avoid access
+ * the their PCI config any more.
+ */
+ if (pe->type & EEH_PE_VF) {
+ eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+ } else {
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+
+ pci_lock_rescan_remove();
+ pci_hp_remove_devices(bus);
+ pci_unlock_rescan_remove();
+ /* The passed PE should no longer be used */
+ return;
}
out:
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 61fdd53cdd9a..7748c278d13c 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -73,13 +73,39 @@ prepare_transfer_to_handler:
_ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler)
#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */
+#if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32)
+ .globl __kuep_lock
+__kuep_lock:
+ lwz r9, THREAD+THSR0(r2)
+ update_user_segments_by_4 r9, r10, r11, r12
+ blr
+
+__kuep_unlock:
+ lwz r9, THREAD+THSR0(r2)
+ rlwinm r9,r9,0,~SR_NX
+ update_user_segments_by_4 r9, r10, r11, r12
+ blr
+
+.macro kuep_lock
+ bl __kuep_lock
+.endm
+.macro kuep_unlock
+ bl __kuep_unlock
+.endm
+#else
+.macro kuep_lock
+.endm
+.macro kuep_unlock
+.endm
+#endif
+
.globl transfer_to_syscall
transfer_to_syscall:
stw r11, GPR1(r1)
stw r11, 0(r1)
mflr r12
stw r12, _LINK(r1)
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#ifdef CONFIG_BOOKE_OR_40x
rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
#endif
lis r12,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
@@ -90,10 +116,10 @@ transfer_to_syscall:
stw r12,8(r1)
stw r2,_TRAP(r1)
SAVE_GPR(0, r1)
- SAVE_4GPRS(3, r1)
- SAVE_2GPRS(7, r1)
+ SAVE_GPRS(3, 8, r1)
addi r2,r10,-THREAD
SAVE_NVGPRS(r1)
+ kuep_lock
/* Calling convention has r9 = orig r0, r10 = regs */
addi r10,r1,STACK_FRAME_OVERHEAD
@@ -110,6 +136,7 @@ ret_from_syscall:
cmplwi cr0,r5,0
bne- 2f
#endif /* CONFIG_PPC_47x */
+ kuep_unlock
lwz r4,_LINK(r1)
lwz r5,_CCR(r1)
mtlr r4
@@ -139,7 +166,7 @@ syscall_exit_finish:
mtxer r5
lwz r0,GPR0(r1)
lwz r3,GPR3(r1)
- REST_8GPRS(4,r1)
+ REST_GPRS(4, 11, r1)
lwz r12,GPR12(r1)
b 1b
@@ -232,9 +259,9 @@ fast_exception_return:
beq 3f /* if not, we've got problems */
#endif
-2: REST_4GPRS(3, r11)
+2: REST_GPRS(3, 6, r11)
lwz r10,_CCR(r11)
- REST_2GPRS(1, r11)
+ REST_GPRS(1, 2, r11)
mtcr r10
lwz r10,_LINK(r11)
mtlr r10
@@ -273,6 +300,7 @@ interrupt_return:
beq .Lkernel_interrupt_return
bl interrupt_exit_user_prepare
cmpwi r3,0
+ kuep_unlock
bne- .Lrestore_nvgprs
.Lfast_user_interrupt_return:
@@ -298,16 +326,14 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
* the reliable stack unwinder later on. Clear it.
*/
stw r0,8(r1)
- REST_4GPRS(7, r1)
- REST_2GPRS(11, r1)
+ REST_GPRS(7, 12, r1)
mtcr r3
mtlr r4
mtctr r5
mtspr SPRN_XER,r6
- REST_4GPRS(2, r1)
- REST_GPR(6, r1)
+ REST_GPRS(2, 6, r1)
REST_GPR(0, r1)
REST_GPR(1, r1)
rfi
@@ -341,8 +367,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
lwz r6,_CCR(r1)
li r0,0
- REST_4GPRS(7, r1)
- REST_2GPRS(11, r1)
+ REST_GPRS(7, 12, r1)
mtlr r3
mtctr r4
@@ -354,7 +379,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
*/
stw r0,8(r1)
- REST_4GPRS(2, r1)
+ REST_GPRS(2, 5, r1)
bne- cr1,1f /* emulate stack store */
mtcr r6
@@ -430,8 +455,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return)
bne interrupt_return; \
lwz r0,GPR0(r1); \
lwz r2,GPR2(r1); \
- REST_4GPRS(3, r1); \
- REST_2GPRS(7, r1); \
+ REST_GPRS(3, 8, r1); \
lwz r10,_XER(r1); \
lwz r11,_CTR(r1); \
mtspr SPRN_XER,r10; \
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 70cff7b49e17..9581906b5ee9 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -180,7 +180,7 @@ _GLOBAL(_switch)
#endif
ld r8,KSP(r4) /* new stack pointer */
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
b 2f
END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
@@ -232,7 +232,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
slbmte r7,r0
isync
2:
-#endif /* CONFIG_PPC_BOOK3S_64 */
+#endif /* CONFIG_PPC_64S_HASH_MMU */
clrrdi r7, r8, THREAD_SHIFT /* base of new stack */
/* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c
index 93b0f3ec8fb0..d4b8aff20815 100644
--- a/arch/powerpc/kernel/epapr_paravirt.c
+++ b/arch/powerpc/kernel/epapr_paravirt.c
@@ -37,7 +37,7 @@ static int __init early_init_dt_scan_epapr(unsigned long node,
return -1;
for (i = 0; i < (len / 4); i++) {
- struct ppc_inst inst = ppc_inst(be32_to_cpu(insts[i]));
+ ppc_inst_t inst = ppc_inst(be32_to_cpu(insts[i]));
patch_instruction(epapr_hypercall_start + i, inst);
#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
patch_instruction(epapr_ev_idle_start + i, inst);
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 711c66b76df1..67dc4e3179a0 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -198,8 +198,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
stdcx. r0,0,r1 /* to clear the reservation */
- REST_4GPRS(2, r1)
- REST_4GPRS(6, r1)
+ REST_GPRS(2, 9, r1)
ld r10,_CTR(r1)
ld r11,_XER(r1)
@@ -375,9 +374,7 @@ ret_from_mc_except:
exc_##n##_common: \
std r0,GPR0(r1); /* save r0 in stackframe */ \
std r2,GPR2(r1); /* save r2 in stackframe */ \
- SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
- SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
- std r9,GPR9(r1); /* save r9 in stackframe */ \
+ SAVE_GPRS(3, 9, r1); /* save r3 - r9 in stackframe */ \
std r10,_NIP(r1); /* save SRR0 to stackframe */ \
std r11,_MSR(r1); /* save SRR1 to stackframe */ \
beq 2f; /* if from kernel mode */ \
@@ -1061,9 +1058,7 @@ bad_stack_book3e:
std r11,_ESR(r1)
std r0,GPR0(r1); /* save r0 in stackframe */ \
std r2,GPR2(r1); /* save r2 in stackframe */ \
- SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
- SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
- std r9,GPR9(r1); /* save r9 in stackframe */ \
+ SAVE_GPRS(3, 9, r1); /* save r3 - r9 in stackframe */ \
ld r3,PACA_EXGEN+EX_R10(r13);/* get back r10 */ \
ld r4,PACA_EXGEN+EX_R11(r13);/* get back r11 */ \
mfspr r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 XXX can be wrong */ \
@@ -1077,8 +1072,7 @@ bad_stack_book3e:
std r10,_LINK(r1)
std r11,_CTR(r1)
std r12,_XER(r1)
- SAVE_10GPRS(14,r1)
- SAVE_8GPRS(24,r1)
+ SAVE_GPRS(14, 31, r1)
lhz r12,PACA_TRAP_SAVE(r13)
std r12,_TRAP(r1)
addi r11,r1,INT_FRAME_SIZE
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index eaf1f72131a1..55caeee37c08 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -48,7 +48,7 @@
.balign IFETCH_ALIGN_BYTES; \
.global name; \
_ASM_NOKPROBE_SYMBOL(name); \
- DEFINE_FIXED_SYMBOL(name); \
+ DEFINE_FIXED_SYMBOL(name, text); \
name:
#define TRAMP_REAL_BEGIN(name) \
@@ -76,31 +76,18 @@ name:
ld reg,PACAKBASE(r13); /* get high part of &label */ \
ori reg,reg,FIXED_SYMBOL_ABS_ADDR(label)
-#define __LOAD_HANDLER(reg, label) \
+#define __LOAD_HANDLER(reg, label, section) \
ld reg,PACAKBASE(r13); \
- ori reg,reg,(ABS_ADDR(label))@l
+ ori reg,reg,(ABS_ADDR(label, section))@l
/*
* Branches from unrelocated code (e.g., interrupts) to labels outside
* head-y require >64K offsets.
*/
-#define __LOAD_FAR_HANDLER(reg, label) \
+#define __LOAD_FAR_HANDLER(reg, label, section) \
ld reg,PACAKBASE(r13); \
- ori reg,reg,(ABS_ADDR(label))@l; \
- addis reg,reg,(ABS_ADDR(label))@h
-
-/*
- * Branch to label using its 0xC000 address. This results in instruction
- * address suitable for MSR[IR]=0 or 1, which allows relocation to be turned
- * on using mtmsr rather than rfid.
- *
- * This could set the 0xc bits for !RELOCATABLE as an immediate, rather than
- * load KBASE for a slight optimisation.
- */
-#define BRANCH_TO_C000(reg, label) \
- __LOAD_FAR_HANDLER(reg, label); \
- mtctr reg; \
- bctr
+ ori reg,reg,(ABS_ADDR(label, section))@l; \
+ addis reg,reg,(ABS_ADDR(label, section))@h
/*
* Interrupt code generation macros
@@ -111,9 +98,10 @@ name:
#define IAREA .L_IAREA_\name\() /* PACA save area */
#define IVIRT .L_IVIRT_\name\() /* Has virt mode entry point */
#define IISIDE .L_IISIDE_\name\() /* Uses SRR0/1 not DAR/DSISR */
+#define ICFAR .L_ICFAR_\name\() /* Uses CFAR */
+#define ICFAR_IF_HVMODE .L_ICFAR_IF_HVMODE_\name\() /* Uses CFAR if HV */
#define IDAR .L_IDAR_\name\() /* Uses DAR (or SRR0) */
#define IDSISR .L_IDSISR_\name\() /* Uses DSISR (or SRR1) */
-#define ISET_RI .L_ISET_RI_\name\() /* Run common code w/ MSR[RI]=1 */
#define IBRANCH_TO_COMMON .L_IBRANCH_TO_COMMON_\name\() /* ENTRY branch to common */
#define IREALMODE_COMMON .L_IREALMODE_COMMON_\name\() /* Common runs in realmode */
#define IMASK .L_IMASK_\name\() /* IRQ soft-mask bit */
@@ -151,15 +139,18 @@ do_define_int n
.ifndef IISIDE
IISIDE=0
.endif
+ .ifndef ICFAR
+ ICFAR=1
+ .endif
+ .ifndef ICFAR_IF_HVMODE
+ ICFAR_IF_HVMODE=0
+ .endif
.ifndef IDAR
IDAR=0
.endif
.ifndef IDSISR
IDSISR=0
.endif
- .ifndef ISET_RI
- ISET_RI=1
- .endif
.ifndef IBRANCH_TO_COMMON
IBRANCH_TO_COMMON=1
.endif
@@ -291,9 +282,21 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
HMT_MEDIUM
std r10,IAREA+EX_R10(r13) /* save r10 - r12 */
+ .if ICFAR
BEGIN_FTR_SECTION
mfspr r10,SPRN_CFAR
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ .elseif ICFAR_IF_HVMODE
+BEGIN_FTR_SECTION
+ BEGIN_FTR_SECTION_NESTED(69)
+ mfspr r10,SPRN_CFAR
+ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 69)
+FTR_SECTION_ELSE
+ BEGIN_FTR_SECTION_NESTED(69)
+ li r10,0
+ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 69)
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .endif
.if \ool
.if !\virt
b tramp_real_\name
@@ -309,9 +312,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
BEGIN_FTR_SECTION
std r9,IAREA+EX_PPR(r13)
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+ .if ICFAR || ICFAR_IF_HVMODE
BEGIN_FTR_SECTION
std r10,IAREA+EX_CFAR(r13)
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ .endif
INTERRUPT_TO_KERNEL
mfctr r10
std r10,IAREA+EX_CTR(r13)
@@ -376,7 +381,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
* This switches to virtual mode and sets MSR[RI].
*/
.macro __GEN_COMMON_ENTRY name
-DEFINE_FIXED_SYMBOL(\name\()_common_real)
+DEFINE_FIXED_SYMBOL(\name\()_common_real, text)
\name\()_common_real:
.if IKVM_REAL
KVMTEST \name kvm_interrupt
@@ -399,7 +404,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)
.endif
.balign IFETCH_ALIGN_BYTES
-DEFINE_FIXED_SYMBOL(\name\()_common_virt)
+DEFINE_FIXED_SYMBOL(\name\()_common_virt, text)
\name\()_common_virt:
.if IKVM_VIRT
KVMTEST \name kvm_interrupt
@@ -413,7 +418,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_virt)
* want to run in real mode.
*/
.macro __GEN_REALMODE_COMMON_ENTRY name
-DEFINE_FIXED_SYMBOL(\name\()_common_real)
+DEFINE_FIXED_SYMBOL(\name\()_common_real, text)
\name\()_common_real:
.if IKVM_REAL
KVMTEST \name kvm_interrupt
@@ -512,11 +517,6 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)
stb r10,PACASRR_VALID(r13)
.endif
- .if ISET_RI
- li r10,MSR_RI
- mtmsrd r10,1 /* Set MSR_RI */
- .endif
-
.if ISTACK
.if IKUAP
kuap_save_amr_and_lock r9, r10, cr1, cr0
@@ -568,14 +568,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
.endif
BEGIN_FTR_SECTION
+ .if ICFAR || ICFAR_IF_HVMODE
ld r10,IAREA+EX_CFAR(r13)
+ .else
+ li r10,0
+ .endif
std r10,ORIG_GPR3(r1)
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
ld r10,IAREA+EX_CTR(r13)
std r10,_CTR(r1)
std r2,GPR2(r1) /* save r2 in stackframe */
- SAVE_4GPRS(3, r1) /* save r3 - r6 in stackframe */
- SAVE_2GPRS(7, r1) /* save r7, r8 in stackframe */
+ SAVE_GPRS(3, 8, r1) /* save r3 - r8 in stackframe */
mflr r9 /* Get LR, later save to stack */
ld r2,PACATOC(r13) /* get kernel TOC into r2 */
std r9,_LINK(r1)
@@ -693,8 +696,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
mtlr r9
ld r9,_CCR(r1)
mtcr r9
- REST_8GPRS(2, r1)
- REST_4GPRS(10, r1)
+ REST_GPRS(2, 13, r1)
REST_GPR(0, r1)
/* restore original r1. */
ld r1,GPR1(r1)
@@ -850,12 +852,12 @@ SOFT_MASK_TABLE(0xc000000000003000, 0xc000000000004000)
#ifdef CONFIG_RELOCATABLE
TRAMP_VIRT_BEGIN(system_call_vectored_tramp)
- __LOAD_HANDLER(r10, system_call_vectored_common)
+ __LOAD_HANDLER(r10, system_call_vectored_common, virt_trampolines)
mtctr r10
bctr
TRAMP_VIRT_BEGIN(system_call_vectored_sigill_tramp)
- __LOAD_HANDLER(r10, system_call_vectored_sigill)
+ __LOAD_HANDLER(r10, system_call_vectored_sigill, virt_trampolines)
mtctr r10
bctr
#endif
@@ -902,11 +904,6 @@ INT_DEFINE_BEGIN(system_reset)
IVEC=0x100
IAREA=PACA_EXNMI
IVIRT=0 /* no virt entry point */
- /*
- * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is
- * being used, so a nested NMI exception would corrupt it.
- */
- ISET_RI=0
ISTACK=0
IKVM_REAL=1
INT_DEFINE_END(system_reset)
@@ -964,7 +961,9 @@ TRAMP_REAL_BEGIN(system_reset_idle_wake)
/* We are waking up from idle, so may clobber any volatile register */
cmpwi cr1,r5,2
bltlr cr1 /* no state loss, return to idle caller with r3=SRR1 */
- BRANCH_TO_C000(r12, DOTSYM(idle_return_gpr_loss))
+ __LOAD_FAR_HANDLER(r12, DOTSYM(idle_return_gpr_loss), real_trampolines)
+ mtctr r12
+ bctr
#endif
#ifdef CONFIG_PPC_PSERIES
@@ -979,16 +978,14 @@ TRAMP_REAL_BEGIN(system_reset_fwnmi)
EXC_COMMON_BEGIN(system_reset_common)
__GEN_COMMON_ENTRY system_reset
/*
- * Increment paca->in_nmi then enable MSR_RI. SLB or MCE will be able
- * to recover, but nested NMI will notice in_nmi and not recover
- * because of the use of the NMI stack. in_nmi reentrancy is tested in
- * system_reset_exception.
+ * Increment paca->in_nmi. When the interrupt entry wrapper later
+ * enable MSR_RI, then SLB or MCE will be able to recover, but a nested
+ * NMI will notice in_nmi and not recover because of the use of the NMI
+ * stack. in_nmi reentrancy is tested in system_reset_exception.
*/
lhz r10,PACA_IN_NMI(r13)
addi r10,r10,1
sth r10,PACA_IN_NMI(r13)
- li r10,MSR_RI
- mtmsrd r10,1
mr r10,r1
ld r1,PACA_NMI_EMERG_SP(r13)
@@ -1062,12 +1059,6 @@ INT_DEFINE_BEGIN(machine_check_early)
IAREA=PACA_EXMC
IVIRT=0 /* no virt entry point */
IREALMODE_COMMON=1
- /*
- * MSR_RI is not enabled, because PACA_EXMC is being used, so a
- * nested machine check corrupts it. machine_check_common enables
- * MSR_RI.
- */
- ISET_RI=0
ISTACK=0
IDAR=1
IDSISR=1
@@ -1078,7 +1069,6 @@ INT_DEFINE_BEGIN(machine_check)
IVEC=0x200
IAREA=PACA_EXMC
IVIRT=0 /* no virt entry point */
- ISET_RI=0
IDAR=1
IDSISR=1
IKVM_REAL=1
@@ -1148,9 +1138,6 @@ EXC_COMMON_BEGIN(machine_check_early_common)
BEGIN_FTR_SECTION
bl enable_machine_check
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
- li r10,MSR_RI
- mtmsrd r10,1
-
addi r3,r1,STACK_FRAME_OVERHEAD
bl machine_check_early
std r3,RESULT(r1) /* Save result */
@@ -1238,10 +1225,6 @@ EXC_COMMON_BEGIN(machine_check_common)
* save area: PACA_EXMC instead of PACA_EXGEN.
*/
GEN_COMMON machine_check
-
- /* Enable MSR_RI when finished with PACA_EXMC */
- li r10,MSR_RI
- mtmsrd r10,1
addi r3,r1,STACK_FRAME_OVERHEAD
bl machine_check_exception_async
b interrupt_return_srr
@@ -1369,11 +1352,15 @@ EXC_COMMON_BEGIN(data_access_common)
addi r3,r1,STACK_FRAME_OVERHEAD
andis. r0,r4,DSISR_DABRMATCH@h
bne- 1f
+#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
bl do_hash_fault
MMU_FTR_SECTION_ELSE
bl do_page_fault
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+#else
+ bl do_page_fault
+#endif
b interrupt_return_srr
1: bl do_break
@@ -1416,6 +1403,7 @@ EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
EXC_COMMON_BEGIN(data_access_slb_common)
GEN_COMMON data_access_slb
+#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
/* HPT case, do SLB fault */
addi r3,r1,STACK_FRAME_OVERHEAD
@@ -1428,9 +1416,12 @@ MMU_FTR_SECTION_ELSE
/* Radix case, access is outside page table range */
li r3,-EFAULT
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+#else
+ li r3,-EFAULT
+#endif
std r3,RESULT(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_bad_slb_fault
+ bl do_bad_segment_interrupt
b interrupt_return_srr
@@ -1462,11 +1453,15 @@ EXC_VIRT_END(instruction_access, 0x4400, 0x80)
EXC_COMMON_BEGIN(instruction_access_common)
GEN_COMMON instruction_access
addi r3,r1,STACK_FRAME_OVERHEAD
+#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
bl do_hash_fault
MMU_FTR_SECTION_ELSE
bl do_page_fault
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+#else
+ bl do_page_fault
+#endif
b interrupt_return_srr
@@ -1496,6 +1491,7 @@ EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80)
EXC_COMMON_BEGIN(instruction_access_slb_common)
GEN_COMMON instruction_access_slb
+#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
/* HPT case, do SLB fault */
addi r3,r1,STACK_FRAME_OVERHEAD
@@ -1508,9 +1504,12 @@ MMU_FTR_SECTION_ELSE
/* Radix case, access is outside page table range */
li r3,-EFAULT
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+#else
+ li r3,-EFAULT
+#endif
std r3,RESULT(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_bad_slb_fault
+ bl do_bad_segment_interrupt
b interrupt_return_srr
@@ -1536,6 +1535,12 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
*
* If soft masked, the masked handler will note the pending interrupt for
* replay, and clear MSR[EE] in the interrupted context.
+ *
+ * CFAR is not required because this is an asynchronous interrupt that in
+ * general won't have much bearing on the state of the CPU, with the possible
+ * exception of crash/debug IPIs, but those are generally moving to use SRESET
+ * IPIs. Unless this is an HV interrupt and KVM HV is possible, in which case
+ * it may be exiting the guest and need CFAR to be saved.
*/
INT_DEFINE_BEGIN(hardware_interrupt)
IVEC=0x500
@@ -1543,6 +1548,10 @@ INT_DEFINE_BEGIN(hardware_interrupt)
IMASK=IRQS_DISABLED
IKVM_REAL=1
IKVM_VIRT=1
+ ICFAR=0
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ ICFAR_IF_HVMODE=1
+#endif
INT_DEFINE_END(hardware_interrupt)
EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
@@ -1764,6 +1773,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
* If PPC_WATCHDOG is configured, the soft masked handler will actually set
* things back up to run soft_nmi_interrupt as a regular interrupt handler
* on the emergency stack.
+ *
+ * CFAR is not required because this is asynchronous (see hardware_interrupt).
+ * A watchdog interrupt may like to have CFAR, but usually the interesting
+ * branch is long gone by that point (e.g., infinite loop).
*/
INT_DEFINE_BEGIN(decrementer)
IVEC=0x900
@@ -1771,6 +1784,7 @@ INT_DEFINE_BEGIN(decrementer)
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
#endif
+ ICFAR=0
INT_DEFINE_END(decrementer)
EXC_REAL_BEGIN(decrementer, 0x900, 0x80)
@@ -1846,6 +1860,8 @@ EXC_COMMON_BEGIN(hdecrementer_common)
* If soft masked, the masked handler will note the pending interrupt for
* replay, leaving MSR[EE] enabled in the interrupted context because the
* doorbells are edge triggered.
+ *
+ * CFAR is not required, similarly to hardware_interrupt.
*/
INT_DEFINE_BEGIN(doorbell_super)
IVEC=0xa00
@@ -1853,6 +1869,7 @@ INT_DEFINE_BEGIN(doorbell_super)
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
#endif
+ ICFAR=0
INT_DEFINE_END(doorbell_super)
EXC_REAL_BEGIN(doorbell_super, 0xa00, 0x100)
@@ -1904,6 +1921,7 @@ INT_DEFINE_BEGIN(system_call)
IVEC=0xc00
IKVM_REAL=1
IKVM_VIRT=1
+ ICFAR=0
INT_DEFINE_END(system_call)
.macro SYSTEM_CALL virt
@@ -1942,12 +1960,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
HMT_MEDIUM
.if ! \virt
- __LOAD_HANDLER(r10, system_call_common_real)
+ __LOAD_HANDLER(r10, system_call_common_real, real_vectors)
mtctr r10
bctr
.else
#ifdef CONFIG_RELOCATABLE
- __LOAD_HANDLER(r10, system_call_common)
+ __LOAD_HANDLER(r10, system_call_common, virt_vectors)
mtctr r10
bctr
#else
@@ -2001,7 +2019,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
* Requires __LOAD_FAR_HANDLER beause kvmppc_hcall lives
* outside the head section.
*/
- __LOAD_FAR_HANDLER(r10, kvmppc_hcall)
+ __LOAD_FAR_HANDLER(r10, kvmppc_hcall, real_trampolines)
mtctr r10
bctr
#else
@@ -2202,6 +2220,11 @@ EXC_COMMON_BEGIN(hmi_exception_common)
* Interrupt 0xe80 - Directed Hypervisor Doorbell Interrupt.
* This is an asynchronous interrupt in response to a msgsnd doorbell.
* Similar to the 0xa00 doorbell but for host rather than guest.
+ *
+ * CFAR is not required (similar to doorbell_interrupt), unless KVM HV
+ * is enabled, in which case it may be a guest exit. Most PowerNV kernels
+ * include KVM support so it would be nice if this could be dynamically
+ * patched out if KVM was not currently running any guests.
*/
INT_DEFINE_BEGIN(h_doorbell)
IVEC=0xe80
@@ -2209,6 +2232,9 @@ INT_DEFINE_BEGIN(h_doorbell)
IMASK=IRQS_DISABLED
IKVM_REAL=1
IKVM_VIRT=1
+#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ ICFAR=0
+#endif
INT_DEFINE_END(h_doorbell)
EXC_REAL_BEGIN(h_doorbell, 0xe80, 0x20)
@@ -2232,6 +2258,9 @@ EXC_COMMON_BEGIN(h_doorbell_common)
* Interrupt 0xea0 - Hypervisor Virtualization Interrupt.
* This is an asynchronous interrupt in response to an "external exception".
* Similar to 0x500 but for host only.
+ *
+ * Like h_doorbell, CFAR is only required for KVM HV because this can be
+ * a guest exit.
*/
INT_DEFINE_BEGIN(h_virt_irq)
IVEC=0xea0
@@ -2239,6 +2268,9 @@ INT_DEFINE_BEGIN(h_virt_irq)
IMASK=IRQS_DISABLED
IKVM_REAL=1
IKVM_VIRT=1
+#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ ICFAR=0
+#endif
INT_DEFINE_END(h_virt_irq)
EXC_REAL_BEGIN(h_virt_irq, 0xea0, 0x20)
@@ -2275,6 +2307,8 @@ EXC_VIRT_NONE(0x4ee0, 0x20)
*
* If soft masked, the masked handler will note the pending interrupt for
* replay, and clear MSR[EE] in the interrupted context.
+ *
+ * CFAR is not used by perf interrupts so not required.
*/
INT_DEFINE_BEGIN(performance_monitor)
IVEC=0xf00
@@ -2282,6 +2316,7 @@ INT_DEFINE_BEGIN(performance_monitor)
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
#endif
+ ICFAR=0
INT_DEFINE_END(performance_monitor)
EXC_REAL_BEGIN(performance_monitor, 0xf00, 0x20)
@@ -2706,6 +2741,7 @@ EXC_VIRT_NONE(0x5800, 0x100)
INT_DEFINE_BEGIN(soft_nmi)
IVEC=0x900
ISTACK=0
+ ICFAR=0
INT_DEFINE_END(soft_nmi)
/*
@@ -3025,7 +3061,7 @@ USE_FIXED_SECTION(virt_trampolines)
.align 7
.globl __end_interrupts
__end_interrupts:
-DEFINE_FIXED_SYMBOL(__end_interrupts)
+DEFINE_FIXED_SYMBOL(__end_interrupts, virt_trampolines)
CLOSE_FIXED_SECTION(real_vectors);
CLOSE_FIXED_SECTION(real_trampolines);
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index b7ceb041743c..d03e488cfe9c 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -251,7 +251,7 @@ bool is_fadump_reserved_mem_contiguous(void)
}
/* Print firmware assisted dump configurations for debugging purpose. */
-static void fadump_show_config(void)
+static void __init fadump_show_config(void)
{
int i;
@@ -353,7 +353,7 @@ static __init u64 fadump_calculate_reserve_size(void)
* Calculate the total memory size required to be reserved for
* firmware-assisted dump registration.
*/
-static unsigned long get_fadump_area_size(void)
+static unsigned long __init get_fadump_area_size(void)
{
unsigned long size = 0;
@@ -462,7 +462,7 @@ static int __init fadump_get_boot_mem_regions(void)
* with the given memory range.
* False, otherwise.
*/
-static bool overlaps_reserved_ranges(u64 base, u64 end, int *idx)
+static bool __init overlaps_reserved_ranges(u64 base, u64 end, int *idx)
{
bool ret = false;
int i;
@@ -737,7 +737,7 @@ void crash_fadump(struct pt_regs *regs, const char *str)
fw_dump.ops->fadump_trigger(fdh, str);
}
-u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
+u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
{
struct elf_prstatus prstatus;
@@ -752,7 +752,7 @@ u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
return buf;
}
-void fadump_update_elfcore_header(char *bufp)
+void __init fadump_update_elfcore_header(char *bufp)
{
struct elf_phdr *phdr;
@@ -770,7 +770,7 @@ void fadump_update_elfcore_header(char *bufp)
return;
}
-static void *fadump_alloc_buffer(unsigned long size)
+static void *__init fadump_alloc_buffer(unsigned long size)
{
unsigned long count, i;
struct page *page;
@@ -792,7 +792,7 @@ static void fadump_free_buffer(unsigned long vaddr, unsigned long size)
free_reserved_area((void *)vaddr, (void *)(vaddr + size), -1, NULL);
}
-s32 fadump_setup_cpu_notes_buf(u32 num_cpus)
+s32 __init fadump_setup_cpu_notes_buf(u32 num_cpus)
{
/* Allocate buffer to hold cpu crash notes. */
fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
@@ -1447,7 +1447,7 @@ static ssize_t release_mem_store(struct kobject *kobj,
}
/* Release the reserved memory and disable the FADump */
-static void unregister_fadump(void)
+static void __init unregister_fadump(void)
{
fadump_cleanup();
fadump_release_memory(fw_dump.reserve_dump_area_start,
@@ -1547,7 +1547,7 @@ ATTRIBUTE_GROUPS(fadump);
DEFINE_SHOW_ATTRIBUTE(fadump_region);
-static void fadump_init_files(void)
+static void __init fadump_init_files(void)
{
int rc = 0;
@@ -1641,6 +1641,14 @@ int __init setup_fadump(void)
else if (fw_dump.reserve_dump_area_size)
fw_dump.ops->fadump_init_mem_struct(&fw_dump);
+ /*
+ * In case of panic, fadump is triggered via ppc_panic_event()
+ * panic notifier. Setting crash_kexec_post_notifiers to 'true'
+ * lets panic() function take crash friendly path before panic
+ * notifiers are invoked.
+ */
+ crash_kexec_post_notifiers = true;
+
return 1;
}
subsys_initcall(setup_fadump);
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index ba4afe3b5a9c..f71f2bbd4de6 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -81,7 +81,12 @@ EXPORT_SYMBOL(store_fp_state)
*/
_GLOBAL(load_up_fpu)
mfmsr r5
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* interrupt doesn't set MSR[RI] and HPT can fault on current access */
+ ori r5,r5,MSR_FP|MSR_RI
+#else
ori r5,r5,MSR_FP
+#endif
#ifdef CONFIG_VSX
BEGIN_FTR_SECTION
oris r5,r5,MSR_VSX@h
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 349c4a820231..c3286260a7d1 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -115,8 +115,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
stw r10,8(r1)
li r10, \trapno
stw r10,_TRAP(r1)
- SAVE_4GPRS(3, r1)
- SAVE_2GPRS(7, r1)
+ SAVE_GPRS(3, 8, r1)
SAVE_NVGPRS(r1)
stw r2,GPR2(r1)
stw r12,_NIP(r1)
@@ -136,6 +135,12 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
andi. r12,r9,MSR_PR
bne 777f
bl prepare_transfer_to_handler
+#ifdef CONFIG_PPC_KUEP
+ b 778f
+777:
+ bl __kuep_lock
+778:
+#endif
777:
#endif
.endm
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 7d72ee5ab387..b6c6d1de5fd5 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -27,6 +27,7 @@
#include <linux/init.h>
#include <linux/pgtable.h>
+#include <linux/sizes.h>
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/mmu.h>
@@ -297,6 +298,10 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
3:
mfspr r11,SPRN_SPRG_THREAD
lwz r11,PGDIR(r11)
+#ifdef CONFIG_PPC_KUAP
+ rlwinm. r9, r9, 0, 0xff
+ beq 5f /* Kuap fault */
+#endif
4:
tophys(r11, r11)
rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */
@@ -377,6 +382,10 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
3:
mfspr r11,SPRN_SPRG_THREAD
lwz r11,PGDIR(r11)
+#ifdef CONFIG_PPC_KUAP
+ rlwinm. r9, r9, 0, 0xff
+ beq 5f /* Kuap fault */
+#endif
4:
tophys(r11, r11)
rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */
@@ -650,7 +659,7 @@ start_here:
b . /* prevent prefetch past rfi */
/* Set up the initial MMU state so we can do the first level of
- * kernel initialization. This maps the first 16 MBytes of memory 1:1
+ * kernel initialization. This maps the first 32 MBytes of memory 1:1
* virtual to physical and more importantly sets the cache mode.
*/
initial_mmu:
@@ -687,6 +696,12 @@ initial_mmu:
tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */
tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */
+ li r0,62 /* TLB slot 62 */
+ addis r4,r4,SZ_16M@h
+ addis r3,r3,SZ_16M@h
+ tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */
+ tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */
+
isync
/* Establish the exception vector base
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index 02d2928d1e01..b73a56466903 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -334,6 +334,10 @@ interrupt_base:
mfspr r12,SPRN_MMUCR
mfspr r13,SPRN_PID /* Get PID */
rlwimi r12,r13,0,24,31 /* Set TID */
+#ifdef CONFIG_PPC_KUAP
+ cmpwi r13,0
+ beq 2f /* KUAP Fault */
+#endif
4:
mtspr SPRN_MMUCR,r12
@@ -444,6 +448,10 @@ interrupt_base:
mfspr r12,SPRN_MMUCR
mfspr r13,SPRN_PID /* Get PID */
rlwimi r12,r13,0,24,31 /* Set TID */
+#ifdef CONFIG_PPC_KUAP
+ cmpwi r13,0
+ beq 2f /* KUAP Fault */
+#endif
4:
mtspr SPRN_MMUCR,r12
@@ -532,10 +540,7 @@ finish_tlb_load_44x:
andi. r10,r12,_PAGE_USER /* User page ? */
beq 1f /* nope, leave U bits empty */
rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */
-#ifdef CONFIG_PPC_KUEP
-0: rlwinm r11,r11,0,~PPC44x_TLB_SX /* Clear SX if User page */
- patch_site 0b, patch__tlb_44x_kuep
-#endif
+ rlwinm r11,r11,0,~PPC44x_TLB_SX /* Clear SX if User page */
1: tlbwe r11,r13,PPC44x_TLB_ATTRIB /* Write ATTRIB */
/* Done...restore registers and get out of here.
@@ -575,6 +580,10 @@ finish_tlb_load_44x:
3: mfspr r11,SPRN_SPRG3
lwz r11,PGDIR(r11)
mfspr r12,SPRN_PID /* Get PID */
+#ifdef CONFIG_PPC_KUAP
+ cmpwi r12,0
+ beq 2f /* KUAP Fault */
+#endif
4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */
/* Mask of required permission bits. Note that while we
@@ -672,6 +681,10 @@ finish_tlb_load_44x:
3: mfspr r11,SPRN_SPRG_THREAD
lwz r11,PGDIR(r11)
mfspr r12,SPRN_PID /* Get PID */
+#ifdef CONFIG_PPC_KUAP
+ cmpwi r12,0
+ beq 2f /* KUAP Fault */
+#endif
4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */
/* Make up the required permissions */
@@ -747,10 +760,7 @@ finish_tlb_load_47x:
andi. r10,r12,_PAGE_USER /* User page ? */
beq 1f /* nope, leave U bits empty */
rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */
-#ifdef CONFIG_PPC_KUEP
-0: rlwinm r11,r11,0,~PPC47x_TLB2_SX /* Clear SX if User page */
- patch_site 0b, patch__tlb_47x_kuep
-#endif
+ rlwinm r11,r11,0,~PPC47x_TLB2_SX /* Clear SX if User page */
1: tlbwe r11,r13,2
/* Done...restore registers and get out of here.
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index f17ae2083733..5c5181e8d5f1 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -126,7 +126,7 @@ __secondary_hold_acknowledge:
. = 0x5c
.globl __run_at_load
__run_at_load:
-DEFINE_FIXED_SYMBOL(__run_at_load)
+DEFINE_FIXED_SYMBOL(__run_at_load, first_256B)
.long RUN_AT_LOAD_DEFAULT
#endif
@@ -156,7 +156,7 @@ __secondary_hold:
/* Tell the master cpu we're here */
/* Relocation is off & we are located at an address less */
/* than 0x100, so only need to grab low order offset. */
- std r24,(ABS_ADDR(__secondary_hold_acknowledge))(0)
+ std r24,(ABS_ADDR(__secondary_hold_acknowledge, first_256B))(0)
sync
li r26,0
@@ -164,7 +164,7 @@ __secondary_hold:
tovirt(r26,r26)
#endif
/* All secondary cpus wait here until told to start. */
-100: ld r12,(ABS_ADDR(__secondary_hold_spinloop))(r26)
+100: ld r12,(ABS_ADDR(__secondary_hold_spinloop, first_256B))(r26)
cmpdi 0,r12,0
beq 100b
@@ -649,15 +649,15 @@ __after_prom_start:
3:
#endif
/* # bytes of memory to copy */
- lis r5,(ABS_ADDR(copy_to_here))@ha
- addi r5,r5,(ABS_ADDR(copy_to_here))@l
+ lis r5,(ABS_ADDR(copy_to_here, text))@ha
+ addi r5,r5,(ABS_ADDR(copy_to_here, text))@l
bl copy_and_flush /* copy the first n bytes */
/* this includes the code being */
/* executed here. */
/* Jump to the copy of this code that we just made */
- addis r8,r3,(ABS_ADDR(4f))@ha
- addi r12,r8,(ABS_ADDR(4f))@l
+ addis r8,r3,(ABS_ADDR(4f, text))@ha
+ addi r12,r8,(ABS_ADDR(4f, text))@l
mtctr r12
bctr
@@ -669,8 +669,8 @@ p_end: .8byte _end - copy_to_here
* Now copy the rest of the kernel up to _end, add
* _end - copy_to_here to the copy limit and run again.
*/
- addis r8,r26,(ABS_ADDR(p_end))@ha
- ld r8,(ABS_ADDR(p_end))@l(r8)
+ addis r8,r26,(ABS_ADDR(p_end, text))@ha
+ ld r8,(ABS_ADDR(p_end, text))@l(r8)
add r5,r5,r8
5: bl copy_and_flush /* copy the rest */
@@ -904,7 +904,7 @@ _GLOBAL(relative_toc)
blr
.balign 8
-p_toc: .8byte __toc_start + 0x8000 - 0b
+p_toc: .8byte .TOC. - 0b
/*
* This is where the main kernel code starts.
diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S
index 68e5c0a7e99d..fa84744d6b24 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -931,7 +931,11 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
_GLOBAL(load_segment_registers)
li r0, NUM_USER_SEGMENTS /* load up user segment register values */
mtctr r0 /* for context 0 */
+#ifdef CONFIG_PPC_KUEP
+ lis r3, SR_NX@h /* Kp = 0, Ks = 0, VSID = 0 */
+#else
li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */
+#endif
li r4, 0
3: mtsrin r3, r4
addi r3, r3, 0x111 /* increment VSID */
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index ef8d1b1c234e..bb6d5d0fc4ac 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -87,8 +87,7 @@ END_BTB_FLUSH_SECTION
stw r10, 8(r1)
li r10, \trapno
stw r10,_TRAP(r1)
- SAVE_4GPRS(3, r1)
- SAVE_2GPRS(7, r1)
+ SAVE_GPRS(3, 8, r1)
SAVE_NVGPRS(r1)
stw r2,GPR2(r1)
stw r12,_NIP(r1)
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 0a9a0f301474..ac2b4dcf5fd3 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -462,6 +462,12 @@ END_BTB_FLUSH_SECTION
mfspr r11,SPRN_SPRG_THREAD
lwz r11,PGDIR(r11)
+#ifdef CONFIG_PPC_KUAP
+ mfspr r12, SPRN_MAS1
+ rlwinm. r12,r12,0,0x3fff0000
+ beq 2f /* KUAP fault */
+#endif
+
4:
/* Mask of required permission bits. Note that while we
* do copy ESR:ST to _PAGE_RW position as trying to write
@@ -571,6 +577,12 @@ END_BTB_FLUSH_SECTION
mfspr r11,SPRN_SPRG_THREAD
lwz r11,PGDIR(r11)
+#ifdef CONFIG_PPC_KUAP
+ mfspr r12, SPRN_MAS1
+ rlwinm. r12,r12,0,0x3fff0000
+ beq 2f /* KUAP fault */
+#endif
+
/* Make up the required permissions for user code */
#ifdef CONFIG_PTE_64BIT
li r13,_PAGE_PRESENT | _PAGE_BAP_UX
@@ -777,6 +789,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS)
andi. r10, r11, _PAGE_USER /* Test for _PAGE_USER */
slwi r10, r12, 1
or r10, r10, r12
+ rlwinm r10, r10, 0, ~_PAGE_EXEC /* Clear SX on user pages */
iseleq r12, r12, r10
rlwimi r13, r12, 0, 20, 31 /* Get RPN from PTE, merge w/ perms */
mtspr SPRN_MAS3, r13
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 91a3be14808b..2669f80b3a49 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -523,7 +523,7 @@ static void larx_stcx_err(struct perf_event *bp, struct arch_hw_breakpoint *info
static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp,
struct arch_hw_breakpoint **info, int *hit,
- struct ppc_inst instr)
+ ppc_inst_t instr)
{
int i;
int stepped;
@@ -616,7 +616,7 @@ int hw_breakpoint_handler(struct die_args *args)
int hit[HBP_NUM_MAX] = {0};
int nr_hit = 0;
bool ptrace_bp = false;
- struct ppc_inst instr = ppc_inst(0);
+ ppc_inst_t instr = ppc_inst(0);
int type = 0;
int size = 0;
unsigned long ea;
diff --git a/arch/powerpc/kernel/hw_breakpoint_constraints.c b/arch/powerpc/kernel/hw_breakpoint_constraints.c
index 42b967e3d85c..a74623025f3a 100644
--- a/arch/powerpc/kernel/hw_breakpoint_constraints.c
+++ b/arch/powerpc/kernel/hw_breakpoint_constraints.c
@@ -80,7 +80,7 @@ static bool check_dawrx_constraints(struct pt_regs *regs, int type,
* Return true if the event is valid wrt dawr configuration,
* including extraneous exception. Otherwise return false.
*/
-bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr,
+bool wp_check_constraints(struct pt_regs *regs, ppc_inst_t instr,
unsigned long ea, int type, int size,
struct arch_hw_breakpoint *info)
{
@@ -127,7 +127,7 @@ bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr,
return false;
}
-void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr,
+void wp_get_instr_detail(struct pt_regs *regs, ppc_inst_t *instr,
int *type, int *size, unsigned long *ea)
{
struct instruction_op op;
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 1f835539fda4..4ad79eb638c6 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -82,7 +82,7 @@ void power4_idle(void)
return;
if (cpu_has_feature(CPU_FTR_ALTIVEC))
- asm volatile("DSSALL ; sync" ::: "memory");
+ asm volatile(PPC_DSSALL " ; sync" ::: "memory");
power4_idle_nap();
diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S
index 13cad9297d82..3c097356366b 100644
--- a/arch/powerpc/kernel/idle_6xx.S
+++ b/arch/powerpc/kernel/idle_6xx.S
@@ -129,7 +129,7 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFCLR(CPU_FTR_NO_DPM)
mtspr SPRN_HID0,r4
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
sync
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
lwz r8,TI_LOCAL_FLAGS(r2) /* set napping bit */
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 563ebfcd16e2..7cd6ce3ec423 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -81,7 +81,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
{
syscall_fn f;
- kuep_lock();
+ kuap_lock();
regs->orig_gpr3 = r3;
@@ -406,7 +406,6 @@ again:
/* Restore user access locks last */
kuap_user_restore(regs);
- kuep_unlock();
return ret;
}
diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
index ec950b08a8dc..92088f848266 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -30,21 +30,23 @@ COMPAT_SYS_CALL_TABLE:
.ifc \srr,srr
mfspr r11,SPRN_SRR0
ld r12,_NIP(r1)
+ clrrdi r12,r12,2
100: tdne r11,r12
- EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
mfspr r11,SPRN_SRR1
ld r12,_MSR(r1)
100: tdne r11,r12
- EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
.else
mfspr r11,SPRN_HSRR0
ld r12,_NIP(r1)
+ clrrdi r12,r12,2
100: tdne r11,r12
- EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
mfspr r11,SPRN_HSRR1
ld r12,_MSR(r1)
100: tdne r11,r12
- EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
.endif
#endif
.endm
@@ -162,10 +164,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
* The value of AMR only matters while we're in the kernel.
*/
mtcr r2
- ld r2,GPR2(r1)
- ld r3,GPR3(r1)
- ld r13,GPR13(r1)
- ld r1,GPR1(r1)
+ REST_GPRS(2, 3, r1)
+ REST_GPR(13, r1)
+ REST_GPR(1, r1)
RFSCV_TO_USER
b . /* prevent speculative execution */
@@ -183,9 +184,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
mtctr r3
mtlr r4
mtspr SPRN_XER,r5
- REST_10GPRS(2, r1)
- REST_2GPRS(12, r1)
- ld r1,GPR1(r1)
+ REST_GPRS(2, 13, r1)
+ REST_GPR(1, r1)
RFI_TO_USER
.Lsyscall_vectored_\name\()_rst_end:
@@ -374,10 +374,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
* The value of AMR only matters while we're in the kernel.
*/
mtcr r2
- ld r2,GPR2(r1)
- ld r3,GPR3(r1)
- ld r13,GPR13(r1)
- ld r1,GPR1(r1)
+ REST_GPRS(2, 3, r1)
+ REST_GPR(13, r1)
+ REST_GPR(1, r1)
RFI_TO_USER
b . /* prevent speculative execution */
@@ -388,8 +387,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
mtctr r3
mtspr SPRN_XER,r4
ld r0,GPR0(r1)
- REST_8GPRS(4, r1)
- ld r12,GPR12(r1)
+ REST_GPRS(4, 12, r1)
b .Lsyscall_restore_regs_cont
.Lsyscall_rst_end:
@@ -518,17 +516,14 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
ld r6,_XER(r1)
li r0,0
- REST_4GPRS(7, r1)
- REST_2GPRS(11, r1)
- REST_GPR(13, r1)
+ REST_GPRS(7, 13, r1)
mtcr r3
mtlr r4
mtctr r5
mtspr SPRN_XER,r6
- REST_4GPRS(2, r1)
- REST_GPR(6, r1)
+ REST_GPRS(2, 6, r1)
REST_GPR(0, r1)
REST_GPR(1, r1)
.ifc \srr,srr
@@ -625,8 +620,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
ld r6,_CCR(r1)
li r0,0
- REST_4GPRS(7, r1)
- REST_2GPRS(11, r1)
+ REST_GPRS(7, 12, r1)
mtlr r3
mtctr r4
@@ -638,7 +632,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
*/
std r0,STACK_FRAME_OVERHEAD-16(r1)
- REST_4GPRS(2, r1)
+ REST_GPRS(2, 5, r1)
bne- cr1,1f /* emulate stack store */
mtcr r6
@@ -703,7 +697,7 @@ interrupt_return_macro hsrr
.globl __end_soft_masked
__end_soft_masked:
-DEFINE_FIXED_SYMBOL(__end_soft_masked)
+DEFINE_FIXED_SYMBOL(__end_soft_masked, text)
#endif /* CONFIG_PPC_BOOK3S */
#ifdef CONFIG_PPC_BOOK3S
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index c4f1d6b7d992..2cf31a97126c 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -745,7 +745,8 @@ void __do_irq(struct pt_regs *regs)
irq = ppc_md.get_irq();
/* We can hard enable interrupts now to allow perf interrupts */
- may_hard_irq_enable();
+ if (should_hard_irq_enable())
+ do_hard_irq_enable();
/* And finally process it */
if (unlikely(!irq))
@@ -811,7 +812,7 @@ void __init init_IRQ(void)
ppc_md.init_IRQ();
}
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#ifdef CONFIG_BOOKE_OR_40x
void *critirq_ctx[NR_CPUS] __read_mostly;
void *dbgirq_ctx[NR_CPUS] __read_mostly;
void *mcheckirq_ctx[NR_CPUS] __read_mostly;
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index bdee7262c080..9f8d0fa7b718 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -48,7 +48,7 @@ static struct hard_trap_info
{ 0x0800, 0x08 /* SIGFPE */ }, /* fp unavailable */
{ 0x0900, 0x0e /* SIGALRM */ }, /* decrementer */
{ 0x0c00, 0x14 /* SIGCHLD */ }, /* system call */
-#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
+#ifdef CONFIG_BOOKE_OR_40x
{ 0x2002, 0x05 /* SIGTRAP */ }, /* debug */
#if defined(CONFIG_FSL_BOOKE)
{ 0x2010, 0x08 /* SIGFPE */ }, /* spe unavailable */
@@ -67,7 +67,7 @@ static struct hard_trap_info
{ 0x2010, 0x08 /* SIGFPE */ }, /* fp unavailable */
{ 0x2020, 0x08 /* SIGFPE */ }, /* ap unavailable */
#endif
-#else /* ! (defined(CONFIG_40x) || defined(CONFIG_BOOKE)) */
+#else /* !CONFIG_BOOKE_OR_40x */
{ 0x0d00, 0x05 /* SIGTRAP */ }, /* single-step */
#if defined(CONFIG_PPC_8xx)
{ 0x1000, 0x04 /* SIGILL */ }, /* software emulation */
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 86d77ff056a6..9a492fdec1df 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -124,7 +124,7 @@ int arch_prepare_kprobe(struct kprobe *p)
{
int ret = 0;
struct kprobe *prev;
- struct ppc_inst insn = ppc_inst_read(p->addr);
+ ppc_inst_t insn = ppc_inst_read(p->addr);
if ((unsigned long)p->addr & 0x03) {
printk("Attempt to register kprobe at an unaligned address\n");
@@ -244,7 +244,7 @@ NOKPROBE_SYMBOL(arch_prepare_kretprobe);
static int try_to_emulate(struct kprobe *p, struct pt_regs *regs)
{
int ret;
- struct ppc_inst insn = ppc_inst_read(p->ainsn.insn);
+ ppc_inst_t insn = ppc_inst_read(p->ainsn.insn);
/* regs->nip is also adjusted if emulate_step returns 1 */
ret = emulate_step(regs, insn);
diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S
index 225511d73bef..f2e03ed423d0 100644
--- a/arch/powerpc/kernel/l2cr_6xx.S
+++ b/arch/powerpc/kernel/l2cr_6xx.S
@@ -96,7 +96,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L2CR)
/* Stop DST streams */
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
sync
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
@@ -292,7 +292,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L3CR)
isync
/* Stop DST streams */
- DSSALL
+ PPC_DSSALL
sync
/* Get the current enable bit of the L3CR into r4 */
@@ -401,7 +401,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_L3CR)
_GLOBAL(__flush_disable_L1)
/* Stop pending alitvec streams and memory accesses */
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
sync
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index fd829f7f25a4..2503dd4713b9 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -586,7 +586,7 @@ void machine_check_print_event_info(struct machine_check_event *evt,
mc_error_class[evt->error_class] : "Unknown";
printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
/* Display faulty slb contents for SLB errors. */
if (evt->error_type == MCE_ERROR_TYPE_SLB && !in_guest)
slb_dump_contents(local_paca->mce_faulty_slbs);
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index c2f55fe7092d..71e8f2a92e36 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -77,15 +77,15 @@ static bool mce_in_guest(void)
}
/* flush SLBs and reload */
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
void flush_and_reload_slb(void)
{
- /* Invalidate all SLBs */
- slb_flush_all_realmode();
-
if (early_radix_enabled())
return;
+ /* Invalidate all SLBs */
+ slb_flush_all_realmode();
+
/*
* This probably shouldn't happen, but it may be possible it's
* called in early boot before SLB shadows are allocated.
@@ -99,7 +99,7 @@ void flush_and_reload_slb(void)
void flush_erat(void)
{
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
flush_and_reload_slb();
return;
@@ -114,7 +114,7 @@ void flush_erat(void)
static int mce_flush(int what)
{
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (what == MCE_FLUSH_SLB) {
flush_and_reload_slb();
return 1;
@@ -455,7 +455,7 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr,
* in real-mode is tricky and can lead to recursive
* faults
*/
- struct ppc_inst instr;
+ ppc_inst_t instr;
unsigned long pfn, instr_addr;
struct instruction_op op;
struct pt_regs tmp = *regs;
@@ -499,8 +499,10 @@ static int mce_handle_ierror(struct pt_regs *regs, unsigned long srr1,
/* attempt to correct the error */
switch (table[i].error_type) {
case MCE_ERROR_TYPE_SLB:
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (local_paca->in_mce == 1)
slb_save_contents(local_paca->mce_faulty_slbs);
+#endif
handled = mce_flush(MCE_FLUSH_SLB);
break;
case MCE_ERROR_TYPE_ERAT:
@@ -588,8 +590,10 @@ static int mce_handle_derror(struct pt_regs *regs,
/* attempt to correct the error */
switch (table[i].error_type) {
case MCE_ERROR_TYPE_SLB:
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (local_paca->in_mce == 1)
slb_save_contents(local_paca->mce_faulty_slbs);
+#endif
if (mce_flush(MCE_FLUSH_SLB))
handled = 1;
break;
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index ed04a3ba66fe..40a583e9d3c7 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -90,16 +90,17 @@ int module_finalize(const Elf_Ehdr *hdr,
}
static __always_inline void *
-__module_alloc(unsigned long size, unsigned long start, unsigned long end)
+__module_alloc(unsigned long size, unsigned long start, unsigned long end, bool nowarn)
{
pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : PAGE_KERNEL_EXEC;
+ gfp_t gfp = GFP_KERNEL | (nowarn ? __GFP_NOWARN : 0);
/*
* Don't do huge page allocations for modules yet until more testing
* is done. STRICT_MODULE_RWX may require extra work to support this
* too.
*/
- return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, prot,
+ return __vmalloc_node_range(size, 1, start, end, gfp, prot,
VM_FLUSH_RESET_PERMS | VM_NO_HUGE_VMAP,
NUMA_NO_NODE, __builtin_return_address(0));
}
@@ -114,13 +115,13 @@ void *module_alloc(unsigned long size)
/* First try within 32M limit from _etext to avoid branch trampolines */
if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit)
- ptr = __module_alloc(size, limit, MODULES_END);
+ ptr = __module_alloc(size, limit, MODULES_END, true);
if (!ptr)
- ptr = __module_alloc(size, MODULES_VADDR, MODULES_END);
+ ptr = __module_alloc(size, MODULES_VADDR, MODULES_END, false);
return ptr;
#else
- return __module_alloc(size, VMALLOC_START, VMALLOC_END);
+ return __module_alloc(size, VMALLOC_START, VMALLOC_END, false);
#endif
}
diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c
index f417afc08d33..a491ad481d85 100644
--- a/arch/powerpc/kernel/module_32.c
+++ b/arch/powerpc/kernel/module_32.c
@@ -273,6 +273,31 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
}
#ifdef CONFIG_DYNAMIC_FTRACE
+int module_trampoline_target(struct module *mod, unsigned long addr,
+ unsigned long *target)
+{
+ unsigned int jmp[4];
+
+ /* Find where the trampoline jumps to */
+ if (copy_from_kernel_nofault(jmp, (void *)addr, sizeof(jmp)))
+ return -EFAULT;
+
+ /* verify that this is what we expect it to be */
+ if ((jmp[0] & 0xffff0000) != PPC_RAW_LIS(_R12, 0) ||
+ (jmp[1] & 0xffff0000) != PPC_RAW_ADDI(_R12, _R12, 0) ||
+ jmp[2] != PPC_RAW_MTCTR(_R12) ||
+ jmp[3] != PPC_RAW_BCTR())
+ return -EINVAL;
+
+ addr = (jmp[1] & 0xffff) | ((jmp[0] & 0xffff) << 16);
+ if (addr & 0x8000)
+ addr -= 0x10000;
+
+ *target = addr;
+
+ return 0;
+}
+
int module_finalize_ftrace(struct module *module, const Elf_Shdr *sechdrs)
{
module->arch.tramp = do_plt_call(module->core_layout.base,
@@ -281,6 +306,14 @@ int module_finalize_ftrace(struct module *module, const Elf_Shdr *sechdrs)
if (!module->arch.tramp)
return -ENOENT;
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ module->arch.tramp_regs = do_plt_call(module->core_layout.base,
+ (unsigned long)ftrace_regs_caller,
+ sechdrs, module);
+ if (!module->arch.tramp_regs)
+ return -ENOENT;
+#endif
+
return 0;
}
#endif
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 3c8d9bbb51cf..0d9f9cd41e13 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -540,7 +540,7 @@ static struct pstore_info nvram_pstore_info = {
.write = nvram_pstore_write,
};
-static int nvram_pstore_init(void)
+static int __init nvram_pstore_init(void)
{
int rc = 0;
@@ -562,7 +562,7 @@ static int nvram_pstore_init(void)
return rc;
}
#else
-static int nvram_pstore_init(void)
+static int __init nvram_pstore_init(void)
{
return -1;
}
@@ -755,7 +755,7 @@ static unsigned char __init nvram_checksum(struct nvram_header *p)
* Per the criteria passed via nvram_remove_partition(), should this
* partition be removed? 1=remove, 0=keep
*/
-static int nvram_can_remove_partition(struct nvram_partition *part,
+static int __init nvram_can_remove_partition(struct nvram_partition *part,
const char *name, int sig, const char *exceptions[])
{
if (part->header.signature != sig)
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index ce1903064031..3b1c2236cbee 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -153,7 +153,7 @@ static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t *ad
int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
{
- struct ppc_inst branch_op_callback, branch_emulate_step, temp;
+ ppc_inst_t branch_op_callback, branch_emulate_step, temp;
unsigned long op_callback_addr, emulate_step_addr;
kprobe_opcode_t *buff;
long b_offset;
@@ -228,12 +228,8 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
/*
* 3. load instruction to be emulated into relevant register, and
*/
- if (IS_ENABLED(CONFIG_PPC64)) {
- temp = ppc_inst_read(p->ainsn.insn);
- patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX);
- } else {
- patch_imm_load_insns((unsigned long)p->ainsn.insn, 4, buff + TMPL_INSN_IDX);
- }
+ temp = ppc_inst_read(p->ainsn.insn);
+ patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX);
/*
* 4. branch back from trampoline
@@ -269,7 +265,7 @@ int arch_check_optimized_kprobe(struct optimized_kprobe *op)
void arch_optimize_kprobes(struct list_head *oplist)
{
- struct ppc_inst instr;
+ ppc_inst_t instr;
struct optimized_kprobe *op;
struct optimized_kprobe *tmp;
diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S
index 19ea3312403c..5c7f0b4b784b 100644
--- a/arch/powerpc/kernel/optprobes_head.S
+++ b/arch/powerpc/kernel/optprobes_head.S
@@ -10,8 +10,8 @@
#include <asm/asm-offsets.h>
#ifdef CONFIG_PPC64
-#define SAVE_30GPRS(base) SAVE_10GPRS(2,base); SAVE_10GPRS(12,base); SAVE_10GPRS(22,base)
-#define REST_30GPRS(base) REST_10GPRS(2,base); REST_10GPRS(12,base); REST_10GPRS(22,base)
+#define SAVE_30GPRS(base) SAVE_GPRS(2, 31, base)
+#define REST_30GPRS(base) REST_GPRS(2, 31, base)
#define TEMPLATE_FOR_IMM_LOAD_INSNS nop; nop; nop; nop; nop
#else
#define SAVE_30GPRS(base) stmw r2, GPR2(base)
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 4208b4044d12..39da688a9455 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -139,8 +139,7 @@ static struct lppaca * __init new_lppaca(int cpu, unsigned long limit)
}
#endif /* CONFIG_PPC_PSERIES */
-#ifdef CONFIG_PPC_BOOK3S_64
-
+#ifdef CONFIG_PPC_64S_HASH_MMU
/*
* 3 persistent SLBs are allocated here. The buffer will be zero
* initially, hence will all be invaild until we actually write them.
@@ -169,8 +168,7 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit)
return s;
}
-
-#endif /* CONFIG_PPC_BOOK3S_64 */
+#endif /* CONFIG_PPC_64S_HASH_MMU */
#ifdef CONFIG_PPC_PSERIES
/**
@@ -226,7 +224,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
new_paca->kexec_state = KEXEC_STATE_NONE;
new_paca->__current = &init_task;
new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL;
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
new_paca->slb_shadow_ptr = NULL;
#endif
@@ -307,7 +305,7 @@ void __init allocate_paca(int cpu)
#ifdef CONFIG_PPC_PSERIES
paca->lppaca_ptr = new_lppaca(cpu, limit);
#endif
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
paca->slb_shadow_ptr = new_slb_shadow(cpu, limit);
#endif
#ifdef CONFIG_PPC_PSERIES
@@ -328,7 +326,7 @@ void __init free_unused_pacas(void)
paca_nr_cpu_ids = nr_cpu_ids;
paca_ptrs_size = new_ptrs_size;
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (early_radix_enabled()) {
/* Ugly fixup, see new_slb_shadow() */
memblock_phys_free(__pa(paca_ptrs[boot_cpuid]->slb_shadow_ptr),
@@ -341,9 +339,9 @@ void __init free_unused_pacas(void)
paca_ptrs_size + paca_struct_size, nr_cpu_ids);
}
+#ifdef CONFIG_PPC_64S_HASH_MMU
void copy_mm_to_paca(struct mm_struct *mm)
{
-#ifdef CONFIG_PPC_BOOK3S
mm_context_t *context = &mm->context;
#ifdef CONFIG_PPC_MM_SLICES
@@ -356,7 +354,5 @@ void copy_mm_to_paca(struct mm_struct *mm)
get_paca()->mm_ctx_user_psize = context->user_psize;
get_paca()->mm_ctx_sllp = context->sllp;
#endif
-#else /* !CONFIG_PPC_BOOK3S */
- return;
-#endif
}
+#endif /* CONFIG_PPC_64S_HASH_MMU */
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 6749905932f4..8bc9cf62cd93 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -62,7 +62,7 @@ EXPORT_SYMBOL(isa_mem_base);
static const struct dma_map_ops *pci_dma_ops;
-void set_pci_dma_ops(const struct dma_map_ops *dma_ops)
+void __init set_pci_dma_ops(const struct dma_map_ops *dma_ops)
{
pci_dma_ops = dma_ops;
}
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index b49e1060a3bf..48537964fba1 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -37,7 +37,7 @@ int pcibios_assign_bus_offset = 1;
EXPORT_SYMBOL(isa_io_base);
EXPORT_SYMBOL(pci_dram_offset);
-void pcibios_make_OF_bus_map(void);
+void __init pcibios_make_OF_bus_map(void);
static void fixup_cpc710_pci64(struct pci_dev* dev);
static u8* pci_to_OF_bus_map;
@@ -109,7 +109,7 @@ make_one_node_map(struct device_node* node, u8 pci_bus)
}
}
-void
+void __init
pcibios_make_OF_bus_map(void)
{
int i;
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 406d7ee9e322..984813a4d5dc 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -628,7 +628,7 @@ static void do_break_handler(struct pt_regs *regs)
{
struct arch_hw_breakpoint null_brk = {0};
struct arch_hw_breakpoint *info;
- struct ppc_inst instr = ppc_inst(0);
+ ppc_inst_t instr = ppc_inst(0);
int type = 0;
int size = 0;
unsigned long ea;
@@ -1156,6 +1156,40 @@ static inline void save_sprs(struct thread_struct *t)
#endif
}
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void kvmppc_save_user_regs(void)
+{
+ unsigned long usermsr;
+
+ if (!current->thread.regs)
+ return;
+
+ usermsr = current->thread.regs->msr;
+
+ if (usermsr & MSR_FP)
+ save_fpu(current);
+
+ if (usermsr & MSR_VEC)
+ save_altivec(current);
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (usermsr & MSR_TM) {
+ current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
+ current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
+ current->thread.tm_texasr = mfspr(SPRN_TEXASR);
+ current->thread.regs->msr &= ~MSR_TM;
+ }
+#endif
+}
+EXPORT_SYMBOL_GPL(kvmppc_save_user_regs);
+
+void kvmppc_save_current_sprs(void)
+{
+ save_sprs(&current->thread);
+}
+EXPORT_SYMBOL_GPL(kvmppc_save_current_sprs);
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+
static inline void restore_sprs(struct thread_struct *old_thread,
struct thread_struct *new_thread)
{
@@ -1206,7 +1240,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
{
struct thread_struct *new_thread, *old_thread;
struct task_struct *last;
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
struct ppc64_tlb_batch *batch;
#endif
@@ -1215,7 +1249,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
WARN_ON(!irqs_disabled());
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
batch = this_cpu_ptr(&ppc64_tlb_batch);
if (batch->active) {
current_thread_info()->local_flags |= _TLF_LAZY_MMU;
@@ -1281,9 +1315,9 @@ struct task_struct *__switch_to(struct task_struct *prev,
set_return_regs_changed(); /* _switch changes stack (and regs) */
-#ifdef CONFIG_PPC32
- kuap_assert_locked();
-#endif
+ if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+ kuap_assert_locked();
+
last = _switch(old_thread, new_thread);
/*
@@ -1294,6 +1328,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
*/
#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
/*
* This applies to a process that was context switched while inside
* arch_enter_lazy_mmu_mode(), to re-activate the batch that was
@@ -1305,6 +1340,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
batch = this_cpu_ptr(&ppc64_tlb_batch);
batch->active = 1;
}
+#endif
/*
* Math facilities are masked out of the child MSR in copy_thread.
@@ -1655,7 +1691,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
{
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
unsigned long sp_vsid;
unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
@@ -1767,6 +1803,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
p->thread.kuap = KUAP_NONE;
#endif
+#if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP)
+ p->thread.pid = MMU_NO_CONTEXT;
+#endif
setup_ksp_vsid(p, sp);
@@ -2299,10 +2338,9 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
* the heap, we can put it above 1TB so it is backed by a 1TB
* segment. Otherwise the heap will be in the bottom 1TB
* which always uses 256MB segments and this may result in a
- * performance penalty. We don't need to worry about radix. For
- * radix, mmu_highuser_ssize remains unchanged from 256MB.
+ * performance penalty.
*/
- if (!is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T))
+ if (!radix_enabled() && !is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T))
base = max_t(unsigned long, mm->brk, 1UL << SID_SHIFT_1T);
#endif
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index ad1230c4f3fe..3d30d40a0e9c 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -231,7 +231,7 @@ static void __init check_cpu_pa_features(unsigned long node)
ibm_pa_features, ARRAY_SIZE(ibm_pa_features));
}
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
static void __init init_mmu_slb_size(unsigned long node)
{
const __be32 *slb_size_ptr;
@@ -447,7 +447,7 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned long node,
*/
#ifdef CONFIG_SPARSEMEM
-static bool validate_mem_limit(u64 base, u64 *size)
+static bool __init validate_mem_limit(u64 base, u64 *size)
{
u64 max_mem = 1UL << (MAX_PHYSMEM_BITS);
@@ -458,7 +458,7 @@ static bool validate_mem_limit(u64 base, u64 *size)
return true;
}
#else
-static bool validate_mem_limit(u64 base, u64 *size)
+static bool __init validate_mem_limit(u64 base, u64 *size)
{
return true;
}
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 18b04b08b983..0ac5faacc909 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -672,7 +672,7 @@ static inline int __init prom_getproplen(phandle node, const char *pname)
return call_prom("getproplen", 2, 1, node, ADDR(pname));
}
-static void add_string(char **str, const char *q)
+static void __init add_string(char **str, const char *q)
{
char *p = *str;
@@ -682,7 +682,7 @@ static void add_string(char **str, const char *q)
*str = p;
}
-static char *tohex(unsigned int x)
+static char *__init tohex(unsigned int x)
{
static const char digits[] __initconst = "0123456789abcdef";
static char result[9] __prombss;
@@ -728,7 +728,7 @@ static int __init prom_setprop(phandle node, const char *nodename,
#define prom_islower(c) ('a' <= (c) && (c) <= 'z')
#define prom_toupper(c) (prom_islower(c) ? ((c) - 'a' + 'A') : (c))
-static unsigned long prom_strtoul(const char *cp, const char **endp)
+static unsigned long __init prom_strtoul(const char *cp, const char **endp)
{
unsigned long result = 0, base = 10, value;
@@ -753,7 +753,7 @@ static unsigned long prom_strtoul(const char *cp, const char **endp)
return result;
}
-static unsigned long prom_memparse(const char *ptr, const char **retptr)
+static unsigned long __init prom_memparse(const char *ptr, const char **retptr)
{
unsigned long ret = prom_strtoul(ptr, retptr);
int shift = 0;
@@ -1786,7 +1786,7 @@ static void __init prom_close_stdin(void)
}
#ifdef CONFIG_PPC_SVM
-static int prom_rtas_hcall(uint64_t args)
+static int __init prom_rtas_hcall(uint64_t args)
{
register uint64_t arg1 asm("r3") = H_RTAS;
register uint64_t arg2 asm("r4") = args;
@@ -2991,7 +2991,7 @@ static void __init fixup_device_tree_efika_add_phy(void)
/* Check if the phy-handle property exists - bail if it does */
rv = prom_getprop(node, "phy-handle", prop, sizeof(prop));
- if (!rv)
+ if (rv <= 0)
return;
/*
@@ -3248,7 +3248,7 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4)
/*
* Perform the Enter Secure Mode ultracall.
*/
-static int enter_secure_mode(unsigned long kbase, unsigned long fdt)
+static int __init enter_secure_mode(unsigned long kbase, unsigned long fdt)
{
register unsigned long r3 asm("r3") = UV_ESM;
register unsigned long r4 asm("r4") = kbase;
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index ff80bbad22a5..733e6ef36758 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -492,8 +492,25 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...)
}
EXPORT_SYMBOL(rtas_call);
-/* For RTAS_BUSY (-2), delay for 1 millisecond. For an extended busy status
- * code of 990n, perform the hinted delay of 10^n (last digit) milliseconds.
+/**
+ * rtas_busy_delay_time() - From an RTAS status value, calculate the
+ * suggested delay time in milliseconds.
+ *
+ * @status: a value returned from rtas_call() or similar APIs which return
+ * the status of a RTAS function call.
+ *
+ * Context: Any context.
+ *
+ * Return:
+ * * 100000 - If @status is 9905.
+ * * 10000 - If @status is 9904.
+ * * 1000 - If @status is 9903.
+ * * 100 - If @status is 9902.
+ * * 10 - If @status is 9901.
+ * * 1 - If @status is either 9900 or -2. This is "wrong" for -2, but
+ * some callers depend on this behavior, and the worst outcome
+ * is that they will delay for longer than necessary.
+ * * 0 - If @status is not a busy or extended delay value.
*/
unsigned int rtas_busy_delay_time(int status)
{
@@ -513,17 +530,77 @@ unsigned int rtas_busy_delay_time(int status)
}
EXPORT_SYMBOL(rtas_busy_delay_time);
-/* For an RTAS busy status code, perform the hinted delay. */
-unsigned int rtas_busy_delay(int status)
+/**
+ * rtas_busy_delay() - helper for RTAS busy and extended delay statuses
+ *
+ * @status: a value returned from rtas_call() or similar APIs which return
+ * the status of a RTAS function call.
+ *
+ * Context: Process context. May sleep or schedule.
+ *
+ * Return:
+ * * true - @status is RTAS_BUSY or an extended delay hint. The
+ * caller may assume that the CPU has been yielded if necessary,
+ * and that an appropriate delay for @status has elapsed.
+ * Generally the caller should reattempt the RTAS call which
+ * yielded @status.
+ *
+ * * false - @status is not @RTAS_BUSY nor an extended delay hint. The
+ * caller is responsible for handling @status.
+ */
+bool rtas_busy_delay(int status)
{
unsigned int ms;
+ bool ret;
- might_sleep();
- ms = rtas_busy_delay_time(status);
- if (ms && need_resched())
- msleep(ms);
+ switch (status) {
+ case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX:
+ ret = true;
+ ms = rtas_busy_delay_time(status);
+ /*
+ * The extended delay hint can be as high as 100 seconds.
+ * Surely any function returning such a status is either
+ * buggy or isn't going to be significantly slowed by us
+ * polling at 1HZ. Clamp the sleep time to one second.
+ */
+ ms = clamp(ms, 1U, 1000U);
+ /*
+ * The delay hint is an order-of-magnitude suggestion, not
+ * a minimum. It is fine, possibly even advantageous, for
+ * us to pause for less time than hinted. For small values,
+ * use usleep_range() to ensure we don't sleep much longer
+ * than actually needed.
+ *
+ * See Documentation/timers/timers-howto.rst for
+ * explanation of the threshold used here. In effect we use
+ * usleep_range() for 9900 and 9901, msleep() for
+ * 9902-9905.
+ */
+ if (ms <= 20)
+ usleep_range(ms * 100, ms * 1000);
+ else
+ msleep(ms);
+ break;
+ case RTAS_BUSY:
+ ret = true;
+ /*
+ * We should call again immediately if there's no other
+ * work to do.
+ */
+ cond_resched();
+ break;
+ default:
+ ret = false;
+ /*
+ * Not a busy or extended delay status; the caller should
+ * handle @status itself. Ensure we warn on misuses in
+ * atomic context regardless.
+ */
+ might_sleep();
+ break;
+ }
- return ms;
+ return ret;
}
EXPORT_SYMBOL(rtas_busy_delay);
@@ -809,13 +886,13 @@ void rtas_os_term(char *str)
/**
* rtas_activate_firmware() - Activate a new version of firmware.
*
+ * Context: This function may sleep.
+ *
* Activate a new version of partition firmware. The OS must call this
* after resuming from a partition hibernation or migration in order
* to maintain the ability to perform live firmware updates. It's not
* catastrophic for this method to be absent or to fail; just log the
* condition in that case.
- *
- * Context: This function may sleep.
*/
void rtas_activate_firmware(void)
{
@@ -890,11 +967,12 @@ int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...)
#endif /* CONFIG_PPC_PSERIES */
/**
- * Find a specific pseries error log in an RTAS extended event log.
+ * get_pseries_errorlog() - Find a specific pseries error log in an RTAS
+ * extended event log.
* @log: RTAS error/event log
* @section_id: two character section identifier
*
- * Returns a pointer to the specified errorlog or NULL if not found.
+ * Return: A pointer to the specified errorlog or NULL if not found.
*/
struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log,
uint16_t section_id)
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 32ee17753eb4..cf0f42909ddf 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -455,7 +455,7 @@ static void rtas_event_scan(struct work_struct *w)
}
#ifdef CONFIG_PPC64
-static void retrieve_nvram_error_log(void)
+static void __init retrieve_nvram_error_log(void)
{
unsigned int err_type ;
int rc ;
@@ -473,12 +473,12 @@ static void retrieve_nvram_error_log(void)
}
}
#else /* CONFIG_PPC64 */
-static void retrieve_nvram_error_log(void)
+static void __init retrieve_nvram_error_log(void)
{
}
#endif /* CONFIG_PPC64 */
-static void start_event_scan(void)
+static void __init start_event_scan(void)
{
printk(KERN_DEBUG "RTAS daemon started\n");
pr_debug("rtasd: will sleep for %d milliseconds\n",
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 15fb5ea1b9ea..e159d4093d98 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -44,7 +44,7 @@ static void enable_barrier_nospec(bool enable)
do_barrier_nospec_fixups(enable);
}
-void setup_barrier_nospec(void)
+void __init setup_barrier_nospec(void)
{
bool enable;
@@ -132,7 +132,7 @@ early_param("nospectre_v2", handle_nospectre_v2);
#endif /* CONFIG_PPC_FSL_BOOK3E || CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_PPC_FSL_BOOK3E
-void setup_spectre_v2(void)
+void __init setup_spectre_v2(void)
{
if (no_spectrev2 || cpu_mitigations_off())
do_btb_flush_fixups();
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 4f1322b65760..f8da937df918 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -582,7 +582,7 @@ static __init int add_pcspkr(void)
device_initcall(add_pcspkr);
#endif /* CONFIG_PCSPKR_PLATFORM */
-void probe_machine(void)
+static __init void probe_machine(void)
{
extern struct machdep_calls __machine_desc_start;
extern struct machdep_calls __machine_desc_end;
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
index 84058bbc8fe9..93f22da12abe 100644
--- a/arch/powerpc/kernel/setup.h
+++ b/arch/powerpc/kernel/setup.h
@@ -29,7 +29,7 @@ void setup_tlb_core_data(void);
static inline void setup_tlb_core_data(void) { }
#endif
-#if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#ifdef CONFIG_BOOKE_OR_40x
void exc_lvl_early_init(void);
#else
static inline void exc_lvl_early_init(void) { }
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 7ec5c47fce0e..a6e9d36d7c01 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -75,7 +75,7 @@ EXPORT_SYMBOL(DMA_MODE_WRITE);
notrace void __init machine_init(u64 dt_ptr)
{
u32 *addr = (u32 *)patch_site_addr(&patch__memset_nocache);
- struct ppc_inst insn;
+ ppc_inst_t insn;
/* Configure static keys first, now that we're relocated. */
setup_feature_keys();
@@ -175,7 +175,7 @@ void __init emergency_stack_init(void)
}
#endif
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#ifdef CONFIG_BOOKE_OR_40x
void __init exc_lvl_early_init(void)
{
unsigned int i, hw_cpu;
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 6052f5d5ded3..d87f7c1103ce 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -499,7 +499,7 @@ void smp_release_cpus(void)
* routines and/or provided to userland
*/
-static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize,
+static void __init init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize,
u32 bsize, u32 sets)
{
info->size = size;
@@ -880,14 +880,23 @@ void __init setup_per_cpu_areas(void)
int rc = -EINVAL;
/*
- * Linear mapping is one of 4K, 1M and 16M. For 4K, no need
- * to group units. For larger mappings, use 1M atom which
- * should be large enough to contain a number of units.
+ * BookE and BookS radix are historical values and should be revisited.
*/
- if (mmu_linear_psize == MMU_PAGE_4K)
+ if (IS_ENABLED(CONFIG_PPC_BOOK3E)) {
+ atom_size = SZ_1M;
+ } else if (radix_enabled()) {
atom_size = PAGE_SIZE;
- else
- atom_size = 1 << 20;
+ } else if (IS_ENABLED(CONFIG_PPC_64S_HASH_MMU)) {
+ /*
+ * Linear mapping is one of 4K, 1M and 16M. For 4K, no need
+ * to group units. For larger mappings, use 1M atom which
+ * should be large enough to contain a number of units.
+ */
+ if (mmu_linear_psize == MMU_PAGE_4K)
+ atom_size = PAGE_SIZE;
+ else
+ atom_size = SZ_1M;
+ }
if (pcpu_chosen_fc != PCPU_FC_PAGE) {
rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 3e053e2fd6b6..d84c434b2b78 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -527,16 +527,20 @@ static long restore_user_regs(struct pt_regs *regs,
regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1));
#ifdef CONFIG_SPE
- /* force the process to reload the spe registers from
- current->thread when it next does spe instructions */
+ /*
+ * Force the process to reload the spe registers from
+ * current->thread when it next does spe instructions.
+ * Since this is user ABI, we must enforce the sizing.
+ */
+ BUILD_BUG_ON(sizeof(current->thread.spe) != ELF_NEVRREG * sizeof(u32));
regs_set_return_msr(regs, regs->msr & ~MSR_SPE);
if (msr & MSR_SPE) {
/* restore spe registers from the stack */
- unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs,
- ELF_NEVRREG * sizeof(u32), failed);
+ unsafe_copy_from_user(&current->thread.spe, &sr->mc_vregs,
+ sizeof(current->thread.spe), failed);
current->thread.used_spe = true;
} else if (current->thread.used_spe)
- memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32));
+ memset(&current->thread.spe, 0, sizeof(current->thread.spe));
/* Always get SPEFSCR back */
unsafe_get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index c23ee842c4c3..b7fd6a72aa76 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -61,6 +61,7 @@
#include <asm/cpu_has_feature.h>
#include <asm/ftrace.h>
#include <asm/kup.h>
+#include <asm/fadump.h>
#ifdef DEBUG
#include <asm/udbg.h>
@@ -621,6 +622,45 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
#endif
#ifdef CONFIG_NMI_IPI
+static void crash_stop_this_cpu(struct pt_regs *regs)
+#else
+static void crash_stop_this_cpu(void *dummy)
+#endif
+{
+ /*
+ * Just busy wait here and avoid marking CPU as offline to ensure
+ * register data is captured appropriately.
+ */
+ while (1)
+ cpu_relax();
+}
+
+void crash_smp_send_stop(void)
+{
+ static bool stopped = false;
+
+ /*
+ * In case of fadump, register data for all CPUs is captured by f/w
+ * on ibm,os-term rtas call. Skip IPI callbacks to other CPUs before
+ * this rtas call to avoid tricky post processing of those CPUs'
+ * backtraces.
+ */
+ if (should_fadump_crash())
+ return;
+
+ if (stopped)
+ return;
+
+ stopped = true;
+
+#ifdef CONFIG_NMI_IPI
+ smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_stop_this_cpu, 1000000);
+#else
+ smp_call_function(crash_stop_this_cpu, NULL, 0);
+#endif /* CONFIG_NMI_IPI */
+}
+
+#ifdef CONFIG_NMI_IPI
static void nmi_stop_this_cpu(struct pt_regs *regs)
{
/*
@@ -896,7 +936,8 @@ out:
return tg;
}
-static int update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg, int cpu, int cpu_group_start)
+static int __init update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg,
+ int cpu, int cpu_group_start)
{
int first_thread = cpu_first_thread_sibling(cpu);
int i;
@@ -1635,12 +1676,14 @@ void start_secondary(void *unused)
BUG();
}
+#ifdef CONFIG_PROFILING
int setup_profiling_timer(unsigned int multiplier)
{
return 0;
}
+#endif
-static void fixup_topology(void)
+static void __init fixup_topology(void)
{
int i;
diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S
index f73f4d72fea4..e0cbd63007f2 100644
--- a/arch/powerpc/kernel/swsusp_32.S
+++ b/arch/powerpc/kernel/swsusp_32.S
@@ -181,7 +181,7 @@ _GLOBAL(swsusp_arch_resume)
#ifdef CONFIG_ALTIVEC
/* Stop pending alitvec streams and memory accesses */
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
sync
diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S
index 96bb20715aa9..9f1903c7f540 100644
--- a/arch/powerpc/kernel/swsusp_asm64.S
+++ b/arch/powerpc/kernel/swsusp_asm64.S
@@ -141,7 +141,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR)
_GLOBAL(swsusp_arch_resume)
/* Stop pending alitvec streams and memory accesses */
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
sync
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 08d8072d6e7a..d45a415d5374 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -214,7 +214,7 @@ static ssize_t __used store_dscr_default(struct device *dev,
static DEVICE_ATTR(dscr_default, 0600,
show_dscr_default, store_dscr_default);
-static void sysfs_create_dscr_default(void)
+static void __init sysfs_create_dscr_default(void)
{
if (cpu_has_feature(CPU_FTR_DSCR)) {
int cpu;
@@ -744,12 +744,12 @@ static ssize_t show_svm(struct device *dev, struct device_attribute *attr, char
}
static DEVICE_ATTR(svm, 0444, show_svm, NULL);
-static void create_svm_file(void)
+static void __init create_svm_file(void)
{
device_create_file(cpu_subsys.dev_root, &dev_attr_svm);
}
#else
-static void create_svm_file(void)
+static void __init create_svm_file(void)
{
}
#endif /* CONFIG_PPC_SVM */
@@ -1110,7 +1110,7 @@ EXPORT_SYMBOL_GPL(cpu_remove_dev_attr_group);
/* NUMA stuff */
#ifdef CONFIG_NUMA
-static void register_nodes(void)
+static void __init register_nodes(void)
{
int i;
@@ -1134,7 +1134,7 @@ void sysfs_remove_device_from_node(struct device *dev, int nid)
EXPORT_SYMBOL_GPL(sysfs_remove_device_from_node);
#else
-static void register_nodes(void)
+static void __init register_nodes(void)
{
return;
}
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index cae8f03a44fe..62361cc7281c 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -88,6 +88,7 @@ static struct clocksource clocksource_timebase = {
#define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF
u64 decrementer_max = DECREMENTER_DEFAULT_MAX;
+EXPORT_SYMBOL_GPL(decrementer_max); /* for KVM HDEC */
static int decrementer_set_next_event(unsigned long evt,
struct clock_event_device *dev);
@@ -107,6 +108,7 @@ struct clock_event_device decrementer_clockevent = {
EXPORT_SYMBOL(decrementer_clockevent);
DEFINE_PER_CPU(u64, decrementers_next_tb);
+EXPORT_SYMBOL_GPL(decrementers_next_tb);
static DEFINE_PER_CPU(struct clock_event_device, decrementers);
#define XSEC_PER_SEC (1024*1024)
@@ -496,6 +498,16 @@ EXPORT_SYMBOL(profile_pc);
* 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
*/
#ifdef CONFIG_PPC64
+static inline unsigned long test_irq_work_pending(void)
+{
+ unsigned long x;
+
+ asm volatile("lbz %0,%1(13)"
+ : "=r" (x)
+ : "i" (offsetof(struct paca_struct, irq_work_pending)));
+ return x;
+}
+
static inline void set_irq_work_pending_flag(void)
{
asm volatile("stb %0,%1(13)" : :
@@ -539,13 +551,44 @@ void arch_irq_work_raise(void)
preempt_enable();
}
+static void set_dec_or_work(u64 val)
+{
+ set_dec(val);
+ /* We may have raced with new irq work */
+ if (unlikely(test_irq_work_pending()))
+ set_dec(1);
+}
+
#else /* CONFIG_IRQ_WORK */
#define test_irq_work_pending() 0
#define clear_irq_work_pending()
+static void set_dec_or_work(u64 val)
+{
+ set_dec(val);
+}
#endif /* CONFIG_IRQ_WORK */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void timer_rearm_host_dec(u64 now)
+{
+ u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
+
+ WARN_ON_ONCE(!arch_irqs_disabled());
+ WARN_ON_ONCE(mfmsr() & MSR_EE);
+
+ if (now >= *next_tb) {
+ local_paca->irq_happened |= PACA_IRQ_DEC;
+ } else {
+ now = *next_tb - now;
+ if (now <= decrementer_max)
+ set_dec_or_work(now);
+ }
+}
+EXPORT_SYMBOL_GPL(timer_rearm_host_dec);
+#endif
+
/*
* timer_interrupt - gets called when the decrementer overflows,
* with interrupts disabled.
@@ -566,22 +609,23 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
return;
}
- /* Ensure a positive value is written to the decrementer, or else
- * some CPUs will continue to take decrementer exceptions. When the
- * PPC_WATCHDOG (decrementer based) is configured, keep this at most
- * 31 bits, which is about 4 seconds on most systems, which gives
- * the watchdog a chance of catching timer interrupt hard lockups.
- */
- if (IS_ENABLED(CONFIG_PPC_WATCHDOG))
- set_dec(0x7fffffff);
- else
- set_dec(decrementer_max);
-
- /* Conditionally hard-enable interrupts now that the DEC has been
- * bumped to its maximum value
- */
- may_hard_irq_enable();
+ /* Conditionally hard-enable interrupts. */
+ if (should_hard_irq_enable()) {
+ /*
+ * Ensure a positive value is written to the decrementer, or
+ * else some CPUs will continue to take decrementer exceptions.
+ * When the PPC_WATCHDOG (decrementer based) is configured,
+ * keep this at most 31 bits, which is about 4 seconds on most
+ * systems, which gives the watchdog a chance of catching timer
+ * interrupt hard lockups.
+ */
+ if (IS_ENABLED(CONFIG_PPC_WATCHDOG))
+ set_dec(0x7fffffff);
+ else
+ set_dec(decrementer_max);
+ do_hard_irq_enable();
+ }
#if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
if (atomic_read(&ppc_n_lost_interrupts) != 0)
@@ -606,10 +650,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
} else {
now = *next_tb - now;
if (now <= decrementer_max)
- set_dec(now);
- /* We may have raced with new irq work */
- if (test_irq_work_pending())
- set_dec(1);
+ set_dec_or_work(now);
__this_cpu_inc(irq_stat.timer_irqs_others);
}
@@ -730,7 +771,7 @@ static int __init get_freq(char *name, int cells, unsigned long *val)
static void start_cpu_decrementer(void)
{
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#ifdef CONFIG_BOOKE_OR_40x
unsigned int tcr;
/* Clear any pending timer interrupts */
@@ -843,11 +884,7 @@ static int decrementer_set_next_event(unsigned long evt,
struct clock_event_device *dev)
{
__this_cpu_write(decrementers_next_tb, get_tb() + evt);
- set_dec(evt);
-
- /* We may have raced with new irq work */
- if (test_irq_work_pending())
- set_dec(1);
+ set_dec_or_work(evt);
return 0;
}
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 2b91f233b05d..3beecc32940b 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -226,11 +226,8 @@ _GLOBAL(tm_reclaim)
/* Sync the userland GPRs 2-12, 14-31 to thread->regs: */
SAVE_GPR(0, r7) /* user r0 */
- SAVE_GPR(2, r7) /* user r2 */
- SAVE_4GPRS(3, r7) /* user r3-r6 */
- SAVE_GPR(8, r7) /* user r8 */
- SAVE_GPR(9, r7) /* user r9 */
- SAVE_GPR(10, r7) /* user r10 */
+ SAVE_GPRS(2, 6, r7) /* user r2-r6 */
+ SAVE_GPRS(8, 10, r7) /* user r8-r10 */
ld r3, GPR1(r1) /* user r1 */
ld r4, GPR7(r1) /* user r7 */
ld r5, GPR11(r1) /* user r11 */
@@ -445,12 +442,8 @@ restore_gprs:
ld r6, THREAD_TM_PPR(r3)
REST_GPR(0, r7) /* GPR0 */
- REST_2GPRS(2, r7) /* GPR2-3 */
- REST_GPR(4, r7) /* GPR4 */
- REST_4GPRS(8, r7) /* GPR8-11 */
- REST_2GPRS(12, r7) /* GPR12-13 */
-
- REST_NVGPRS(r7) /* GPR14-31 */
+ REST_GPRS(2, 4, r7) /* GPR2-4 */
+ REST_GPRS(8, 31, r7) /* GPR8-31 */
/* Load up PPR and DSCR here so we don't run with user values for long */
mtspr SPRN_DSCR, r5
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index d89c5df4f206..80b6285769f2 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -41,10 +41,10 @@
#define NUM_FTRACE_TRAMPS 8
static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS];
-static struct ppc_inst
+static ppc_inst_t
ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
{
- struct ppc_inst op;
+ ppc_inst_t op;
addr = ppc_function_entry((void *)addr);
@@ -55,9 +55,9 @@ ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
}
static int
-ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new)
+ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new)
{
- struct ppc_inst replaced;
+ ppc_inst_t replaced;
/*
* Note:
@@ -90,24 +90,24 @@ ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new)
*/
static int test_24bit_addr(unsigned long ip, unsigned long addr)
{
- struct ppc_inst op;
+ ppc_inst_t op;
addr = ppc_function_entry((void *)addr);
/* use the create_branch to verify that this offset can be branched */
return create_branch(&op, (u32 *)ip, addr, 0) == 0;
}
-static int is_bl_op(struct ppc_inst op)
+static int is_bl_op(ppc_inst_t op)
{
return (ppc_inst_val(op) & 0xfc000003) == 0x48000001;
}
-static int is_b_op(struct ppc_inst op)
+static int is_b_op(ppc_inst_t op)
{
return (ppc_inst_val(op) & 0xfc000003) == 0x48000000;
}
-static unsigned long find_bl_target(unsigned long ip, struct ppc_inst op)
+static unsigned long find_bl_target(unsigned long ip, ppc_inst_t op)
{
int offset;
@@ -127,7 +127,7 @@ __ftrace_make_nop(struct module *mod,
{
unsigned long entry, ptr, tramp;
unsigned long ip = rec->ip;
- struct ppc_inst op, pop;
+ ppc_inst_t op, pop;
/* read where this goes */
if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
@@ -221,10 +221,9 @@ static int
__ftrace_make_nop(struct module *mod,
struct dyn_ftrace *rec, unsigned long addr)
{
- struct ppc_inst op;
- unsigned int jmp[4];
+ ppc_inst_t op;
unsigned long ip = rec->ip;
- unsigned long tramp;
+ unsigned long tramp, ptr;
if (copy_from_kernel_nofault(&op, (void *)ip, MCOUNT_INSN_SIZE))
return -EFAULT;
@@ -238,41 +237,13 @@ __ftrace_make_nop(struct module *mod,
/* lets find where the pointer goes */
tramp = find_bl_target(ip, op);
- /*
- * On PPC32 the trampoline looks like:
- * 0x3d, 0x80, 0x00, 0x00 lis r12,sym@ha
- * 0x39, 0x8c, 0x00, 0x00 addi r12,r12,sym@l
- * 0x7d, 0x89, 0x03, 0xa6 mtctr r12
- * 0x4e, 0x80, 0x04, 0x20 bctr
- */
-
- pr_devel("ip:%lx jumps to %lx", ip, tramp);
-
/* Find where the trampoline jumps to */
- if (copy_from_kernel_nofault(jmp, (void *)tramp, sizeof(jmp))) {
- pr_err("Failed to read %lx\n", tramp);
+ if (module_trampoline_target(mod, tramp, &ptr)) {
+ pr_err("Failed to get trampoline target\n");
return -EFAULT;
}
- pr_devel(" %08x %08x ", jmp[0], jmp[1]);
-
- /* verify that this is what we expect it to be */
- if (((jmp[0] & 0xffff0000) != 0x3d800000) ||
- ((jmp[1] & 0xffff0000) != 0x398c0000) ||
- (jmp[2] != 0x7d8903a6) ||
- (jmp[3] != 0x4e800420)) {
- pr_err("Not a trampoline\n");
- return -EINVAL;
- }
-
- tramp = (jmp[1] & 0xffff) |
- ((jmp[0] & 0xffff) << 16);
- if (tramp & 0x8000)
- tramp -= 0x10000;
-
- pr_devel(" %lx ", tramp);
-
- if (tramp != addr) {
+ if (ptr != addr) {
pr_err("Trampoline location %08lx does not match addr\n",
tramp);
return -EINVAL;
@@ -291,7 +262,7 @@ __ftrace_make_nop(struct module *mod,
static unsigned long find_ftrace_tramp(unsigned long ip)
{
int i;
- struct ppc_inst instr;
+ ppc_inst_t instr;
/*
* We have the compiler generated long_branch tramps at the end
@@ -329,9 +300,9 @@ static int add_ftrace_tramp(unsigned long tramp)
static int setup_mcount_compiler_tramp(unsigned long tramp)
{
int i;
- struct ppc_inst op;
+ ppc_inst_t op;
unsigned long ptr;
- struct ppc_inst instr;
+ ppc_inst_t instr;
static unsigned long ftrace_plt_tramps[NUM_FTRACE_TRAMPS];
/* Is this a known long jump tramp? */
@@ -396,7 +367,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp)
static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr)
{
unsigned long tramp, ip = rec->ip;
- struct ppc_inst op;
+ ppc_inst_t op;
/* Read where this goes */
if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
@@ -436,7 +407,7 @@ int ftrace_make_nop(struct module *mod,
struct dyn_ftrace *rec, unsigned long addr)
{
unsigned long ip = rec->ip;
- struct ppc_inst old, new;
+ ppc_inst_t old, new;
/*
* If the calling address is more that 24 bits away,
@@ -489,7 +460,7 @@ int ftrace_make_nop(struct module *mod,
*/
#ifndef CONFIG_MPROFILE_KERNEL
static int
-expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1)
+expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1)
{
/*
* We expect to see:
@@ -507,7 +478,7 @@ expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1)
}
#else
static int
-expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1)
+expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1)
{
/* look for patched "NOP" on ppc64 with -mprofile-kernel */
if (!ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP())))
@@ -519,8 +490,8 @@ expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1)
static int
__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
- struct ppc_inst op[2];
- struct ppc_inst instr;
+ ppc_inst_t op[2];
+ ppc_inst_t instr;
void *ip = (void *)rec->ip;
unsigned long entry, ptr, tramp;
struct module *mod = rec->arch.mod;
@@ -588,8 +559,10 @@ static int
__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
int err;
- struct ppc_inst op;
+ ppc_inst_t op;
u32 *ip = (u32 *)rec->ip;
+ struct module *mod = rec->arch.mod;
+ unsigned long tramp;
/* read where this goes */
if (copy_inst_from_kernel_nofault(&op, ip))
@@ -602,13 +575,23 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
}
/* If we never set up a trampoline to ftrace_caller, then bail */
- if (!rec->arch.mod->arch.tramp) {
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ if (!mod->arch.tramp || !mod->arch.tramp_regs) {
+#else
+ if (!mod->arch.tramp) {
+#endif
pr_err("No ftrace trampoline\n");
return -EINVAL;
}
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ if (rec->flags & FTRACE_FL_REGS)
+ tramp = mod->arch.tramp_regs;
+ else
+#endif
+ tramp = mod->arch.tramp;
/* create the branch to the trampoline */
- err = create_branch(&op, ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK);
+ err = create_branch(&op, ip, tramp, BRANCH_SET_LINK);
if (err) {
pr_err("REL24 out of range!\n");
return -EINVAL;
@@ -626,7 +609,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)
{
- struct ppc_inst op;
+ ppc_inst_t op;
void *ip = (void *)rec->ip;
unsigned long tramp, entry, ptr;
@@ -674,7 +657,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
unsigned long ip = rec->ip;
- struct ppc_inst old, new;
+ ppc_inst_t old, new;
/*
* If the calling address is more that 24 bits away,
@@ -713,7 +696,7 @@ static int
__ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long addr)
{
- struct ppc_inst op;
+ ppc_inst_t op;
unsigned long ip = rec->ip;
unsigned long entry, ptr, tramp;
struct module *mod = rec->arch.mod;
@@ -807,7 +790,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long addr)
{
unsigned long ip = rec->ip;
- struct ppc_inst old, new;
+ ppc_inst_t old, new;
/*
* If the calling address is more that 24 bits away,
@@ -847,7 +830,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
int ftrace_update_ftrace_func(ftrace_func_t func)
{
unsigned long ip = (unsigned long)(&ftrace_call);
- struct ppc_inst old, new;
+ ppc_inst_t old, new;
int ret;
old = ppc_inst_read((u32 *)&ftrace_call);
@@ -932,7 +915,7 @@ int ftrace_enable_ftrace_graph_caller(void)
unsigned long ip = (unsigned long)(&ftrace_graph_call);
unsigned long addr = (unsigned long)(&ftrace_graph_caller);
unsigned long stub = (unsigned long)(&ftrace_graph_stub);
- struct ppc_inst old, new;
+ ppc_inst_t old, new;
old = ftrace_call_replace(ip, stub, 0);
new = ftrace_call_replace(ip, addr, 0);
@@ -945,7 +928,7 @@ int ftrace_disable_ftrace_graph_caller(void)
unsigned long ip = (unsigned long)(&ftrace_graph_call);
unsigned long addr = (unsigned long)(&ftrace_graph_caller);
unsigned long stub = (unsigned long)(&ftrace_graph_stub);
- struct ppc_inst old, new;
+ ppc_inst_t old, new;
old = ftrace_call_replace(ip, addr, 0);
new = ftrace_call_replace(ip, stub, 0);
diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S
index e023ae59c429..0a02c0cb12d9 100644
--- a/arch/powerpc/kernel/trace/ftrace_32.S
+++ b/arch/powerpc/kernel/trace/ftrace_32.S
@@ -9,55 +9,135 @@
#include <asm/asm-offsets.h>
#include <asm/ftrace.h>
#include <asm/export.h>
+#include <asm/ptrace.h>
_GLOBAL(mcount)
_GLOBAL(_mcount)
/*
* It is required that _mcount on PPC32 must preserve the
- * link register. But we have r0 to play with. We use r0
+ * link register. But we have r12 to play with. We use r12
* to push the return address back to the caller of mcount
* into the ctr register, restore the link register and
* then jump back using the ctr register.
*/
- mflr r0
- mtctr r0
- lwz r0, 4(r1)
+ mflr r12
+ mtctr r12
mtlr r0
bctr
+EXPORT_SYMBOL(_mcount)
_GLOBAL(ftrace_caller)
MCOUNT_SAVE_FRAME
/* r3 ends up with link register */
subi r3, r3, MCOUNT_INSN_SIZE
+ lis r5,function_trace_op@ha
+ lwz r5,function_trace_op@l(r5)
+ li r6, 0
.globl ftrace_call
ftrace_call:
bl ftrace_stub
nop
+ MCOUNT_RESTORE_FRAME
+ftrace_caller_common:
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
.globl ftrace_graph_call
ftrace_graph_call:
b ftrace_graph_stub
_GLOBAL(ftrace_graph_stub)
#endif
- MCOUNT_RESTORE_FRAME
/* old link register ends up in ctr reg */
bctr
-EXPORT_SYMBOL(_mcount)
_GLOBAL(ftrace_stub)
blr
+_GLOBAL(ftrace_regs_caller)
+ /* Save the original return address in A's stack frame */
+ stw r0,LRSAVE(r1)
+
+ /* Create our stack frame + pt_regs */
+ stwu r1,-INT_FRAME_SIZE(r1)
+
+ /* Save all gprs to pt_regs */
+ stw r0, GPR0(r1)
+ stmw r2, GPR2(r1)
+
+ /* Save previous stack pointer (r1) */
+ addi r8, r1, INT_FRAME_SIZE
+ stw r8, GPR1(r1)
+
+ /* Load special regs for save below */
+ mfmsr r8
+ mfctr r9
+ mfxer r10
+ mfcr r11
+
+ /* Get the _mcount() call site out of LR */
+ mflr r7
+ /* Save it as pt_regs->nip */
+ stw r7, _NIP(r1)
+ /* Save the read LR in pt_regs->link */
+ stw r0, _LINK(r1)
+
+ lis r3,function_trace_op@ha
+ lwz r5,function_trace_op@l(r3)
+
+ /* Calculate ip from nip-4 into r3 for call below */
+ subi r3, r7, MCOUNT_INSN_SIZE
+
+ /* Put the original return address in r4 as parent_ip */
+ mr r4, r0
+
+ /* Save special regs */
+ stw r8, _MSR(r1)
+ stw r9, _CTR(r1)
+ stw r10, _XER(r1)
+ stw r11, _CCR(r1)
+
+ /* Load &pt_regs in r6 for call below */
+ addi r6, r1, STACK_FRAME_OVERHEAD
+
+ /* ftrace_call(r3, r4, r5, r6) */
+.globl ftrace_regs_call
+ftrace_regs_call:
+ bl ftrace_stub
+ nop
+
+ /* Load ctr with the possibly modified NIP */
+ lwz r3, _NIP(r1)
+ mtctr r3
+
+ /* Restore gprs */
+ lmw r2, GPR2(r1)
+
+ /* Restore possibly modified LR */
+ lwz r0, _LINK(r1)
+ mtlr r0
+
+ /* Pop our stack frame */
+ addi r1, r1, INT_FRAME_SIZE
+
+ b ftrace_caller_common
+
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
_GLOBAL(ftrace_graph_caller)
+ stwu r1,-48(r1)
+ stw r3, 12(r1)
+ stw r4, 16(r1)
+ stw r5, 20(r1)
+ stw r6, 24(r1)
+ stw r7, 28(r1)
+ stw r8, 32(r1)
+ stw r9, 36(r1)
+ stw r10,40(r1)
+
addi r5, r1, 48
- /* load r4 with local address */
- lwz r4, 44(r1)
+ mfctr r4 /* ftrace_caller has moved local addr here */
+ stw r4, 44(r1)
+ mflr r3 /* ftrace_caller has restored LR from stack */
subi r4, r4, MCOUNT_INSN_SIZE
- /* Grab the LR out of the caller stack frame */
- lwz r3,52(r1)
-
bl prepare_ftrace_return
nop
@@ -66,9 +146,21 @@ _GLOBAL(ftrace_graph_caller)
* Change the LR in the callers stack frame to this.
*/
stw r3,52(r1)
+ mtlr r3
+ lwz r0,44(r1)
+ mtctr r0
+
+ lwz r3, 12(r1)
+ lwz r4, 16(r1)
+ lwz r5, 20(r1)
+ lwz r6, 24(r1)
+ lwz r7, 28(r1)
+ lwz r8, 32(r1)
+ lwz r9, 36(r1)
+ lwz r10,40(r1)
+
+ addi r1, r1, 48
- MCOUNT_RESTORE_FRAME
- /* old link register ends up in ctr reg */
bctr
_GLOBAL(return_to_handler)
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index f9fd5f743eba..d636fc755f60 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -41,15 +41,14 @@ _GLOBAL(ftrace_regs_caller)
/* Save all gprs to pt_regs */
SAVE_GPR(0, r1)
- SAVE_10GPRS(2, r1)
+ SAVE_GPRS(2, 11, r1)
/* Ok to continue? */
lbz r3, PACA_FTRACE_ENABLED(r13)
cmpdi r3, 0
beq ftrace_no_trace
- SAVE_10GPRS(12, r1)
- SAVE_10GPRS(22, r1)
+ SAVE_GPRS(12, 31, r1)
/* Save previous stack pointer (r1) */
addi r8, r1, SWITCH_FRAME_SIZE
@@ -108,10 +107,8 @@ ftrace_regs_call:
#endif
/* Restore gprs */
- REST_GPR(0,r1)
- REST_10GPRS(2,r1)
- REST_10GPRS(12,r1)
- REST_10GPRS(22,r1)
+ REST_GPR(0, r1)
+ REST_GPRS(2, 31, r1)
/* Restore possibly modified LR */
ld r0, _LINK(r1)
@@ -157,7 +154,7 @@ _GLOBAL(ftrace_caller)
stdu r1, -SWITCH_FRAME_SIZE(r1)
/* Save all gprs to pt_regs */
- SAVE_8GPRS(3, r1)
+ SAVE_GPRS(3, 10, r1)
lbz r3, PACA_FTRACE_ENABLED(r13)
cmpdi r3, 0
@@ -194,7 +191,7 @@ ftrace_call:
mtctr r3
/* Restore gprs */
- REST_8GPRS(3,r1)
+ REST_GPRS(3, 10, r1)
/* Restore callee's TOC */
ld r2, 24(r1)
diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c
index 8513aa49614e..d3942de254c6 100644
--- a/arch/powerpc/kernel/udbg_16550.c
+++ b/arch/powerpc/kernel/udbg_16550.c
@@ -84,7 +84,7 @@ static int udbg_uart_getc(void)
return udbg_uart_in(UART_RBR);
}
-static void udbg_use_uart(void)
+static void __init udbg_use_uart(void)
{
udbg_putc = udbg_uart_putc;
udbg_flush = udbg_uart_flush;
@@ -92,7 +92,7 @@ static void udbg_use_uart(void)
udbg_getc_poll = udbg_uart_getc_poll;
}
-void udbg_uart_setup(unsigned int speed, unsigned int clock)
+void __init udbg_uart_setup(unsigned int speed, unsigned int clock)
{
unsigned int dll, base_bauds;
@@ -121,7 +121,7 @@ void udbg_uart_setup(unsigned int speed, unsigned int clock)
udbg_uart_out(UART_FCR, 0x7);
}
-unsigned int udbg_probe_uart_speed(unsigned int clock)
+unsigned int __init udbg_probe_uart_speed(unsigned int clock)
{
unsigned int dll, dlm, divisor, prescaler, speed;
u8 old_lcr;
@@ -172,7 +172,7 @@ static void udbg_uart_out_pio(unsigned int reg, u8 data)
outb(data, udbg_uart.pio_base + (reg * udbg_uart_stride));
}
-void udbg_uart_init_pio(unsigned long port, unsigned int stride)
+void __init udbg_uart_init_pio(unsigned long port, unsigned int stride)
{
if (!port)
return;
@@ -194,7 +194,7 @@ static void udbg_uart_out_mmio(unsigned int reg, u8 data)
}
-void udbg_uart_init_mmio(void __iomem *addr, unsigned int stride)
+void __init udbg_uart_init_mmio(void __iomem *addr, unsigned int stride)
{
if (!addr)
return;
diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c
index ae632569446f..fd9432875ebc 100644
--- a/arch/powerpc/kernel/vecemu.c
+++ b/arch/powerpc/kernel/vecemu.c
@@ -261,7 +261,7 @@ static unsigned int rfin(unsigned int x)
int emulate_altivec(struct pt_regs *regs)
{
- struct ppc_inst instr;
+ ppc_inst_t instr;
unsigned int i, word;
unsigned int va, vb, vc, vd;
vector128 *vrs;
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index ba03eedfdcd8..5cc24d8cce94 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -47,6 +47,10 @@ EXPORT_SYMBOL(store_vr_state)
*/
_GLOBAL(load_up_altivec)
mfmsr r5 /* grab the current MSR */
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* interrupt doesn't set MSR[RI] and HPT can fault on current access */
+ ori r5,r5,MSR_RI
+#endif
oris r5,r5,MSR_VEC@h
MTMSRD(r5) /* enable use of AltiVec now */
isync
@@ -126,6 +130,12 @@ _GLOBAL(load_up_vsx)
andis. r5,r12,MSR_VEC@h
beql+ load_up_altivec /* skip if already loaded */
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* interrupt doesn't set MSR[RI] and HPT can fault on current access */
+ li r5,MSR_RI
+ mtmsrd r5,1
+#endif
+
ld r4,PACACURRENT(r13)
addi r4,r4,THREAD /* Get THREAD */
li r6,1
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 18e42c74abdd..2bcca818136a 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -322,10 +322,6 @@ SECTIONS
#ifdef CONFIG_PPC32
.data : AT(ADDR(.data) - LOAD_OFFSET) {
DATA_DATA
-#ifdef CONFIG_UBSAN
- *(.data..Lubsan_data*)
- *(.data..Lubsan_type*)
-#endif
*(.data.rel*)
*(SDATA_MAIN)
*(.sdata2)
@@ -336,24 +332,18 @@ SECTIONS
#else
.data : AT(ADDR(.data) - LOAD_OFFSET) {
DATA_DATA
-#ifdef CONFIG_UBSAN
- *(.data..Lubsan_data*)
- *(.data..Lubsan_type*)
-#endif
*(.data.rel*)
*(.toc1)
*(.branch_lt)
}
- . = ALIGN(256);
- .got : AT(ADDR(.got) - LOAD_OFFSET) {
- __toc_start = .;
+ .got : AT(ADDR(.got) - LOAD_OFFSET) ALIGN(256) {
+ *(.got)
#ifndef CONFIG_RELOCATABLE
__prom_init_toc_start = .;
- arch/powerpc/kernel/prom_init.o*(.toc .got)
+ arch/powerpc/kernel/prom_init.o*(.toc)
__prom_init_toc_end = .;
#endif
- *(.got)
*(.toc)
}
#endif
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 3fa6d240bade..bfc27496fe7e 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -85,10 +85,37 @@ static DEFINE_PER_CPU(u64, wd_timer_tb);
/* SMP checker bits */
static unsigned long __wd_smp_lock;
+static unsigned long __wd_reporting;
+static unsigned long __wd_nmi_output;
static cpumask_t wd_smp_cpus_pending;
static cpumask_t wd_smp_cpus_stuck;
static u64 wd_smp_last_reset_tb;
+/*
+ * Try to take the exclusive watchdog action / NMI IPI / printing lock.
+ * wd_smp_lock must be held. If this fails, we should return and wait
+ * for the watchdog to kick in again (or another CPU to trigger it).
+ *
+ * Importantly, if hardlockup_panic is set, wd_try_report failure should
+ * not delay the panic, because whichever other CPU is reporting will
+ * call panic.
+ */
+static bool wd_try_report(void)
+{
+ if (__wd_reporting)
+ return false;
+ __wd_reporting = 1;
+ return true;
+}
+
+/* End printing after successful wd_try_report. wd_smp_lock not required. */
+static void wd_end_reporting(void)
+{
+ smp_mb(); /* End printing "critical section" */
+ WARN_ON_ONCE(__wd_reporting == 0);
+ WRITE_ONCE(__wd_reporting, 0);
+}
+
static inline void wd_smp_lock(unsigned long *flags)
{
/*
@@ -128,109 +155,182 @@ static void wd_lockup_ipi(struct pt_regs *regs)
else
dump_stack();
+ /*
+ * __wd_nmi_output must be set after we printk from NMI context.
+ *
+ * printk from NMI context defers printing to the console to irq_work.
+ * If that NMI was taken in some code that is hard-locked, then irqs
+ * are disabled so irq_work will never fire. That can result in the
+ * hard lockup messages being delayed (indefinitely, until something
+ * else kicks the console drivers).
+ *
+ * Setting __wd_nmi_output will cause another CPU to notice and kick
+ * the console drivers for us.
+ *
+ * xchg is not needed here (it could be a smp_mb and store), but xchg
+ * gives the memory ordering and atomicity required.
+ */
+ xchg(&__wd_nmi_output, 1);
+
/* Do not panic from here because that can recurse into NMI IPI layer */
}
-static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb)
+static bool set_cpu_stuck(int cpu)
{
- cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask);
- cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask);
+ cpumask_set_cpu(cpu, &wd_smp_cpus_stuck);
+ cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
+ /*
+ * See wd_smp_clear_cpu_pending()
+ */
+ smp_mb();
if (cpumask_empty(&wd_smp_cpus_pending)) {
- wd_smp_last_reset_tb = tb;
+ wd_smp_last_reset_tb = get_tb();
cpumask_andnot(&wd_smp_cpus_pending,
&wd_cpus_enabled,
&wd_smp_cpus_stuck);
+ return true;
}
-}
-static void set_cpu_stuck(int cpu, u64 tb)
-{
- set_cpumask_stuck(cpumask_of(cpu), tb);
+ return false;
}
-static void watchdog_smp_panic(int cpu, u64 tb)
+static void watchdog_smp_panic(int cpu)
{
+ static cpumask_t wd_smp_cpus_ipi; // protected by reporting
unsigned long flags;
+ u64 tb, last_reset;
int c;
wd_smp_lock(&flags);
/* Double check some things under lock */
- if ((s64)(tb - wd_smp_last_reset_tb) < (s64)wd_smp_panic_timeout_tb)
+ tb = get_tb();
+ last_reset = wd_smp_last_reset_tb;
+ if ((s64)(tb - last_reset) < (s64)wd_smp_panic_timeout_tb)
goto out;
if (cpumask_test_cpu(cpu, &wd_smp_cpus_pending))
goto out;
- if (cpumask_weight(&wd_smp_cpus_pending) == 0)
+ if (!wd_try_report())
+ goto out;
+ for_each_online_cpu(c) {
+ if (!cpumask_test_cpu(c, &wd_smp_cpus_pending))
+ continue;
+ if (c == cpu)
+ continue; // should not happen
+
+ __cpumask_set_cpu(c, &wd_smp_cpus_ipi);
+ if (set_cpu_stuck(c))
+ break;
+ }
+ if (cpumask_empty(&wd_smp_cpus_ipi)) {
+ wd_end_reporting();
goto out;
+ }
+ wd_smp_unlock(&flags);
pr_emerg("CPU %d detected hard LOCKUP on other CPUs %*pbl\n",
- cpu, cpumask_pr_args(&wd_smp_cpus_pending));
+ cpu, cpumask_pr_args(&wd_smp_cpus_ipi));
pr_emerg("CPU %d TB:%lld, last SMP heartbeat TB:%lld (%lldms ago)\n",
- cpu, tb, wd_smp_last_reset_tb,
- tb_to_ns(tb - wd_smp_last_reset_tb) / 1000000);
+ cpu, tb, last_reset, tb_to_ns(tb - last_reset) / 1000000);
if (!sysctl_hardlockup_all_cpu_backtrace) {
/*
* Try to trigger the stuck CPUs, unless we are going to
* get a backtrace on all of them anyway.
*/
- for_each_cpu(c, &wd_smp_cpus_pending) {
- if (c == cpu)
- continue;
+ for_each_cpu(c, &wd_smp_cpus_ipi) {
smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000);
+ __cpumask_clear_cpu(c, &wd_smp_cpus_ipi);
}
- }
-
- /* Take the stuck CPUs out of the watch group */
- set_cpumask_stuck(&wd_smp_cpus_pending, tb);
-
- wd_smp_unlock(&flags);
-
- if (sysctl_hardlockup_all_cpu_backtrace)
+ } else {
trigger_allbutself_cpu_backtrace();
-
- /*
- * Force flush any remote buffers that might be stuck in IRQ context
- * and therefore could not run their irq_work.
- */
- printk_trigger_flush();
+ cpumask_clear(&wd_smp_cpus_ipi);
+ }
if (hardlockup_panic)
nmi_panic(NULL, "Hard LOCKUP");
+ wd_end_reporting();
+
return;
out:
wd_smp_unlock(&flags);
}
-static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
+static void wd_smp_clear_cpu_pending(int cpu)
{
if (!cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) {
if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) {
struct pt_regs *regs = get_irq_regs();
unsigned long flags;
- wd_smp_lock(&flags);
-
pr_emerg("CPU %d became unstuck TB:%lld\n",
- cpu, tb);
+ cpu, get_tb());
print_irqtrace_events(current);
if (regs)
show_regs(regs);
else
dump_stack();
+ wd_smp_lock(&flags);
cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck);
wd_smp_unlock(&flags);
+ } else {
+ /*
+ * The last CPU to clear pending should have reset the
+ * watchdog so we generally should not find it empty
+ * here if our CPU was clear. However it could happen
+ * due to a rare race with another CPU taking the
+ * last CPU out of the mask concurrently.
+ *
+ * We can't add a warning for it. But just in case
+ * there is a problem with the watchdog that is causing
+ * the mask to not be reset, try to kick it along here.
+ */
+ if (unlikely(cpumask_empty(&wd_smp_cpus_pending)))
+ goto none_pending;
}
return;
}
+
+ /*
+ * All other updates to wd_smp_cpus_pending are performed under
+ * wd_smp_lock. All of them are atomic except the case where the
+ * mask becomes empty and is reset. This will not happen here because
+ * cpu was tested to be in the bitmap (above), and a CPU only clears
+ * its own bit. _Except_ in the case where another CPU has detected a
+ * hard lockup on our CPU and takes us out of the pending mask. So in
+ * normal operation there will be no race here, no problem.
+ *
+ * In the lockup case, this atomic clear-bit vs a store that refills
+ * other bits in the accessed word wll not be a problem. The bit clear
+ * is atomic so it will not cause the store to get lost, and the store
+ * will never set this bit so it will not overwrite the bit clear. The
+ * only way for a stuck CPU to return to the pending bitmap is to
+ * become unstuck itself.
+ */
cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
+
+ /*
+ * Order the store to clear pending with the load(s) to check all
+ * words in the pending mask to check they are all empty. This orders
+ * with the same barrier on another CPU. This prevents two CPUs
+ * clearing the last 2 pending bits, but neither seeing the other's
+ * store when checking if the mask is empty, and missing an empty
+ * mask, which ends with a false positive.
+ */
+ smp_mb();
if (cpumask_empty(&wd_smp_cpus_pending)) {
unsigned long flags;
+none_pending:
+ /*
+ * Double check under lock because more than one CPU could see
+ * a clear mask with the lockless check after clearing their
+ * pending bits.
+ */
wd_smp_lock(&flags);
if (cpumask_empty(&wd_smp_cpus_pending)) {
- wd_smp_last_reset_tb = tb;
+ wd_smp_last_reset_tb = get_tb();
cpumask_andnot(&wd_smp_cpus_pending,
&wd_cpus_enabled,
&wd_smp_cpus_stuck);
@@ -245,10 +345,21 @@ static void watchdog_timer_interrupt(int cpu)
per_cpu(wd_timer_tb, cpu) = tb;
- wd_smp_clear_cpu_pending(cpu, tb);
+ wd_smp_clear_cpu_pending(cpu);
if ((s64)(tb - wd_smp_last_reset_tb) >= (s64)wd_smp_panic_timeout_tb)
- watchdog_smp_panic(cpu, tb);
+ watchdog_smp_panic(cpu);
+
+ if (__wd_nmi_output && xchg(&__wd_nmi_output, 0)) {
+ /*
+ * Something has called printk from NMI context. It might be
+ * stuck, so this this triggers a flush that will get that
+ * printk output to the console.
+ *
+ * See wd_lockup_ipi.
+ */
+ printk_trigger_flush();
+ }
}
DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
@@ -267,12 +378,27 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
tb = get_tb();
if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) {
+ /*
+ * Taking wd_smp_lock here means it is a soft-NMI lock, which
+ * means we can't take any regular or irqsafe spin locks while
+ * holding this lock. This is why timers can't printk while
+ * holding the lock.
+ */
wd_smp_lock(&flags);
if (cpumask_test_cpu(cpu, &wd_smp_cpus_stuck)) {
wd_smp_unlock(&flags);
return 0;
}
- set_cpu_stuck(cpu, tb);
+ if (!wd_try_report()) {
+ wd_smp_unlock(&flags);
+ /* Couldn't report, try again in 100ms */
+ mtspr(SPRN_DEC, 100 * tb_ticks_per_usec * 1000);
+ return 0;
+ }
+
+ set_cpu_stuck(cpu);
+
+ wd_smp_unlock(&flags);
pr_emerg("CPU %d self-detected hard LOCKUP @ %pS\n",
cpu, (void *)regs->nip);
@@ -283,14 +409,21 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
print_irqtrace_events(current);
show_regs(regs);
- wd_smp_unlock(&flags);
+ xchg(&__wd_nmi_output, 1); // see wd_lockup_ipi
if (sysctl_hardlockup_all_cpu_backtrace)
trigger_allbutself_cpu_backtrace();
if (hardlockup_panic)
nmi_panic(regs, "Hard LOCKUP");
+
+ wd_end_reporting();
}
+ /*
+ * We are okay to change DEC in soft_nmi_interrupt because the masked
+ * handler has marked a DEC as pending, so the timer interrupt will be
+ * replayed as soon as local irqs are enabled again.
+ */
if (wd_panic_timeout_tb < 0x7fffffff)
mtspr(SPRN_DEC, wd_panic_timeout_tb);
@@ -318,11 +451,15 @@ void arch_touch_nmi_watchdog(void)
{
unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000;
int cpu = smp_processor_id();
- u64 tb = get_tb();
+ u64 tb;
+ if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
+ return;
+
+ tb = get_tb();
if (tb - per_cpu(wd_timer_tb, cpu) >= ticks) {
per_cpu(wd_timer_tb, cpu) = tb;
- wd_smp_clear_cpu_pending(cpu, tb);
+ wd_smp_clear_cpu_pending(cpu);
}
}
EXPORT_SYMBOL(arch_touch_nmi_watchdog);
@@ -380,7 +517,7 @@ static void stop_watchdog(void *arg)
cpumask_clear_cpu(cpu, &wd_cpus_enabled);
wd_smp_unlock(&flags);
- wd_smp_clear_cpu_pending(cpu, get_tb());
+ wd_smp_clear_cpu_pending(cpu);
}
static int stop_watchdog_on_cpu(unsigned int cpu)
diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c
index a2242017e55f..8b68d9f91a03 100644
--- a/arch/powerpc/kexec/core.c
+++ b/arch/powerpc/kexec/core.c
@@ -185,7 +185,7 @@ void __init reserve_crashkernel(void)
}
}
-int overlaps_crashkernel(unsigned long start, unsigned long size)
+int __init overlaps_crashkernel(unsigned long start, unsigned long size)
{
return (start + size) > crashk_res.start && start <= crashk_res.end;
}
diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index 66678518b938..635b5fc30b53 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -378,7 +378,7 @@ void default_machine_kexec(struct kimage *image)
/* NOTREACHED */
}
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
/* Values we need to export to the second kernel via the device tree. */
static unsigned long htab_base;
static unsigned long htab_size;
@@ -420,4 +420,4 @@ static int __init export_htab_values(void)
return 0;
}
late_initcall(export_htab_values);
-#endif /* CONFIG_PPC_BOOK3S_64 */
+#endif /* CONFIG_PPC_64S_HASH_MMU */
diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c
index 6b81c852feab..563e9989a5bf 100644
--- a/arch/powerpc/kexec/ranges.c
+++ b/arch/powerpc/kexec/ranges.c
@@ -296,7 +296,7 @@ int add_initrd_mem_range(struct crash_mem **mem_ranges)
return ret;
}
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
/**
* add_htab_mem_range - Adds htab range to the given memory ranges list,
* if it exists
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index ff581d70f20c..f947b77386a9 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -69,6 +69,7 @@ config KVM_BOOK3S_64
select KVM_BOOK3S_64_HANDLER
select KVM
select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE
+ select PPC_64S_HASH_MMU
select SPAPR_TCE_IOMMU if IOMMU_SUPPORT && (PPC_PSERIES || PPC_POWERNV)
help
Support running unmodified book3s_64 and book3s_32 guest kernels
@@ -130,6 +131,21 @@ config KVM_BOOK3S_HV_EXIT_TIMING
If unsure, say N.
+config KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND
+ bool "Nested L0 host workaround for L1 KVM host PMU handling bug" if EXPERT
+ depends on KVM_BOOK3S_HV_POSSIBLE
+ default !EXPERT
+ help
+ Old nested HV capable Linux guests have a bug where they don't
+ reflect the PMU in-use status of their L2 guest to the L0 host
+ while the L2 PMU registers are live. This can result in loss
+ of L2 PMU register state, causing perf to not work correctly in
+ L2 guests.
+
+ Selecting this option for the L0 host implements a workaround for
+ those buggy L1s which saves the L2 state, at the cost of performance
+ in all nested-capable guest entry/exit.
+
config KVM_BOOKE_HV
bool
diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
index 983b8c18bc31..05e003eb5d90 100644
--- a/arch/powerpc/kvm/book3s_64_entry.S
+++ b/arch/powerpc/kvm/book3s_64_entry.S
@@ -374,11 +374,16 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
BEGIN_FTR_SECTION
mtspr SPRN_DAWRX1,r10
END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
- mtspr SPRN_PID,r10
/*
- * Switch to host MMU mode
+ * Switch to host MMU mode (don't have the real host PID but we aren't
+ * going back to userspace).
*/
+ hwsync
+ isync
+
+ mtspr SPRN_PID,r10
+
ld r10, HSTATE_KVM_VCPU(r13)
ld r10, VCPU_KVM(r10)
lwz r10, KVM_HOST_LPID(r10)
@@ -389,6 +394,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
ld r10, KVM_HOST_LPCR(r10)
mtspr SPRN_LPCR,r10
+ isync
+
/*
* Set GUEST_MODE_NONE so the handler won't branch to KVM, and clear
* MSR_RI in r12 ([H]SRR1) so the handler won't try to return.
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 16359525a40f..8cebe5542256 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -57,6 +57,8 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
preempt_disable();
+ asm volatile("hwsync" ::: "memory");
+ isync();
/* switch the lpid first to avoid running host with unallocated pid */
old_lpid = mfspr(SPRN_LPID);
if (old_lpid != lpid)
@@ -75,6 +77,8 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
ret = __copy_to_user_inatomic((void __user *)to, from, n);
pagefault_enable();
+ asm volatile("hwsync" ::: "memory");
+ isync();
/* switch the pid first to avoid running host with unallocated pid */
if (quadrant == 1 && pid != old_pid)
mtspr(SPRN_PID, old_pid);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 7b74fc0a986b..f64e45d6c0f4 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -80,6 +80,7 @@
#include <asm/plpar_wrappers.h>
#include "book3s.h"
+#include "book3s_hv.h"
#define CREATE_TRACE_POINTS
#include "trace_hv.h"
@@ -127,11 +128,6 @@ static bool nested = true;
module_param(nested, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(nested, "Enable nested virtualization (only on POWER9)");
-static inline bool nesting_enabled(struct kvm *kvm)
-{
- return kvm->arch.nested_enable && kvm_is_radix(kvm);
-}
-
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
/*
@@ -276,22 +272,26 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
* they should never fail.)
*/
-static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc)
+static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc, u64 tb)
{
unsigned long flags;
+ WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
spin_lock_irqsave(&vc->stoltb_lock, flags);
- vc->preempt_tb = mftb();
+ vc->preempt_tb = tb;
spin_unlock_irqrestore(&vc->stoltb_lock, flags);
}
-static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc)
+static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc, u64 tb)
{
unsigned long flags;
+ WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
spin_lock_irqsave(&vc->stoltb_lock, flags);
if (vc->preempt_tb != TB_NIL) {
- vc->stolen_tb += mftb() - vc->preempt_tb;
+ vc->stolen_tb += tb - vc->preempt_tb;
vc->preempt_tb = TB_NIL;
}
spin_unlock_irqrestore(&vc->stoltb_lock, flags);
@@ -301,6 +301,12 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
unsigned long flags;
+ u64 now;
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ return;
+
+ now = mftb();
/*
* We can test vc->runner without taking the vcore lock,
@@ -309,12 +315,12 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
* ever sets it to NULL.
*/
if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
- kvmppc_core_end_stolen(vc);
+ kvmppc_core_end_stolen(vc, now);
spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
vcpu->arch.busy_preempt != TB_NIL) {
- vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt;
+ vcpu->arch.busy_stolen += now - vcpu->arch.busy_preempt;
vcpu->arch.busy_preempt = TB_NIL;
}
spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
@@ -324,13 +330,19 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
unsigned long flags;
+ u64 now;
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ return;
+
+ now = mftb();
if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
- kvmppc_core_start_stolen(vc);
+ kvmppc_core_start_stolen(vc, now);
spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
- vcpu->arch.busy_preempt = mftb();
+ vcpu->arch.busy_preempt = now;
spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
}
@@ -675,6 +687,8 @@ static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
u64 p;
unsigned long flags;
+ WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
spin_lock_irqsave(&vc->stoltb_lock, flags);
p = vc->stolen_tb;
if (vc->vcore_state != VCORE_INACTIVE &&
@@ -684,35 +698,30 @@ static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
return p;
}
-static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
- struct kvmppc_vcore *vc)
+static void __kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
+ unsigned int pcpu, u64 now,
+ unsigned long stolen)
{
struct dtl_entry *dt;
struct lppaca *vpa;
- unsigned long stolen;
- unsigned long core_stolen;
- u64 now;
- unsigned long flags;
dt = vcpu->arch.dtl_ptr;
vpa = vcpu->arch.vpa.pinned_addr;
- now = mftb();
- core_stolen = vcore_stolen_time(vc, now);
- stolen = core_stolen - vcpu->arch.stolen_logged;
- vcpu->arch.stolen_logged = core_stolen;
- spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
- stolen += vcpu->arch.busy_stolen;
- vcpu->arch.busy_stolen = 0;
- spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
+
if (!dt || !vpa)
return;
- memset(dt, 0, sizeof(struct dtl_entry));
+
dt->dispatch_reason = 7;
- dt->processor_id = cpu_to_be16(vc->pcpu + vcpu->arch.ptid);
- dt->timebase = cpu_to_be64(now + vc->tb_offset);
+ dt->preempt_reason = 0;
+ dt->processor_id = cpu_to_be16(pcpu + vcpu->arch.ptid);
dt->enqueue_to_dispatch_time = cpu_to_be32(stolen);
+ dt->ready_to_enqueue_time = 0;
+ dt->waiting_to_ready_time = 0;
+ dt->timebase = cpu_to_be64(now);
+ dt->fault_addr = 0;
dt->srr0 = cpu_to_be64(kvmppc_get_pc(vcpu));
dt->srr1 = cpu_to_be64(vcpu->arch.shregs.msr);
+
++dt;
if (dt == vcpu->arch.dtl.pinned_end)
dt = vcpu->arch.dtl.pinned_addr;
@@ -723,6 +732,27 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
vcpu->arch.dtl.dirty = true;
}
+static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
+ struct kvmppc_vcore *vc)
+{
+ unsigned long stolen;
+ unsigned long core_stolen;
+ u64 now;
+ unsigned long flags;
+
+ now = mftb();
+
+ core_stolen = vcore_stolen_time(vc, now);
+ stolen = core_stolen - vcpu->arch.stolen_logged;
+ vcpu->arch.stolen_logged = core_stolen;
+ spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
+ stolen += vcpu->arch.busy_stolen;
+ vcpu->arch.busy_stolen = 0;
+ spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
+
+ __kvmppc_create_dtl_entry(vcpu, vc->pcpu, now + vc->tb_offset, stolen);
+}
+
/* See if there is a doorbell interrupt pending for a vcpu */
static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
{
@@ -731,6 +761,8 @@ static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
if (vcpu->arch.doorbell_request)
return true;
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ return false;
/*
* Ensure that the read of vcore->dpdes comes after the read
* of vcpu->doorbell_request. This barrier matches the
@@ -900,13 +932,14 @@ static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
* mode handler is not called but no other threads are in the
* source vcore.
*/
-
- spin_lock(&vcore->lock);
- if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
- vcore->vcore_state != VCORE_INACTIVE &&
- vcore->runner)
- target = vcore->runner;
- spin_unlock(&vcore->lock);
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+ spin_lock(&vcore->lock);
+ if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
+ vcore->vcore_state != VCORE_INACTIVE &&
+ vcore->runner)
+ target = vcore->runner;
+ spin_unlock(&vcore->lock);
+ }
return kvm_vcpu_yield_to(target);
}
@@ -1421,6 +1454,43 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
return RESUME_GUEST;
}
+/*
+ * If the lppaca had pmcregs_in_use clear when we exited the guest, then
+ * HFSCR_PM is cleared for next entry. If the guest then tries to access
+ * the PMU SPRs, we get this facility unavailable interrupt. Putting HFSCR_PM
+ * back in the guest HFSCR will cause the next entry to load the PMU SPRs and
+ * allow the guest access to continue.
+ */
+static int kvmppc_pmu_unavailable(struct kvm_vcpu *vcpu)
+{
+ if (!(vcpu->arch.hfscr_permitted & HFSCR_PM))
+ return EMULATE_FAIL;
+
+ vcpu->arch.hfscr |= HFSCR_PM;
+
+ return RESUME_GUEST;
+}
+
+static int kvmppc_ebb_unavailable(struct kvm_vcpu *vcpu)
+{
+ if (!(vcpu->arch.hfscr_permitted & HFSCR_EBB))
+ return EMULATE_FAIL;
+
+ vcpu->arch.hfscr |= HFSCR_EBB;
+
+ return RESUME_GUEST;
+}
+
+static int kvmppc_tm_unavailable(struct kvm_vcpu *vcpu)
+{
+ if (!(vcpu->arch.hfscr_permitted & HFSCR_TM))
+ return EMULATE_FAIL;
+
+ vcpu->arch.hfscr |= HFSCR_TM;
+
+ return RESUME_GUEST;
+}
+
static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
struct task_struct *tsk)
{
@@ -1451,6 +1521,10 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
run->ready_for_interrupt_injection = 1;
switch (vcpu->arch.trap) {
/* We're good on these - the host merely wanted to get our attention */
+ case BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER:
+ WARN_ON_ONCE(1); /* Should never happen */
+ vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER;
+ fallthrough;
case BOOK3S_INTERRUPT_HV_DECREMENTER:
vcpu->stat.dec_exits++;
r = RESUME_GUEST;
@@ -1575,7 +1649,8 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
unsigned long vsid;
long err;
- if (vcpu->arch.fault_dsisr == HDSISR_CANARY) {
+ if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG) &&
+ unlikely(vcpu->arch.fault_dsisr == HDSISR_CANARY)) {
r = RESUME_GUEST; /* Just retry if it's the canary */
break;
}
@@ -1702,16 +1777,26 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
* to emulate.
* Otherwise, we just generate a program interrupt to the guest.
*/
- case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
+ case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: {
+ u64 cause = vcpu->arch.hfscr >> 56;
+
r = EMULATE_FAIL;
- if (((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG) &&
- cpu_has_feature(CPU_FTR_ARCH_300))
- r = kvmppc_emulate_doorbell_instr(vcpu);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (cause == FSCR_MSGP_LG)
+ r = kvmppc_emulate_doorbell_instr(vcpu);
+ if (cause == FSCR_PM_LG)
+ r = kvmppc_pmu_unavailable(vcpu);
+ if (cause == FSCR_EBB_LG)
+ r = kvmppc_ebb_unavailable(vcpu);
+ if (cause == FSCR_TM_LG)
+ r = kvmppc_tm_unavailable(vcpu);
+ }
if (r == EMULATE_FAIL) {
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
r = RESUME_GUEST;
}
break;
+ }
case BOOK3S_INTERRUPT_HV_RM_HARD:
r = RESUME_PASSTHROUGH;
@@ -1768,6 +1853,12 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
vcpu->stat.ext_intr_exits++;
r = RESUME_GUEST;
break;
+ /* These need to go to the nested HV */
+ case BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER:
+ vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER;
+ vcpu->stat.dec_exits++;
+ r = RESUME_HOST;
+ break;
/* SR/HMI/PMI are HV interrupts that host has handled. Resume guest.*/
case BOOK3S_INTERRUPT_HMI:
case BOOK3S_INTERRUPT_PERFMON:
@@ -2096,8 +2187,10 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
* either vcore->dpdes or doorbell_request.
* On POWER8, doorbell_request is 0.
*/
- *val = get_reg_val(id, vcpu->arch.vcore->dpdes |
- vcpu->arch.doorbell_request);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ *val = get_reg_val(id, vcpu->arch.doorbell_request);
+ else
+ *val = get_reg_val(id, vcpu->arch.vcore->dpdes);
break;
case KVM_REG_PPC_VTB:
*val = get_reg_val(id, vcpu->arch.vcore->vtb);
@@ -2238,8 +2331,7 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
*val = get_reg_val(id, vcpu->arch.vcore->arch_compat);
break;
case KVM_REG_PPC_DEC_EXPIRY:
- *val = get_reg_val(id, vcpu->arch.dec_expires +
- vcpu->arch.vcore->tb_offset);
+ *val = get_reg_val(id, vcpu->arch.dec_expires);
break;
case KVM_REG_PPC_ONLINE:
*val = get_reg_val(id, vcpu->arch.online);
@@ -2335,7 +2427,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
vcpu->arch.pspb = set_reg_val(id, *val);
break;
case KVM_REG_PPC_DPDES:
- vcpu->arch.vcore->dpdes = set_reg_val(id, *val);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ vcpu->arch.doorbell_request = set_reg_val(id, *val) & 1;
+ else
+ vcpu->arch.vcore->dpdes = set_reg_val(id, *val);
break;
case KVM_REG_PPC_VTB:
vcpu->arch.vcore->vtb = set_reg_val(id, *val);
@@ -2491,8 +2586,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_DEC_EXPIRY:
- vcpu->arch.dec_expires = set_reg_val(id, *val) -
- vcpu->arch.vcore->tb_offset;
+ vcpu->arch.dec_expires = set_reg_val(id, *val);
break;
case KVM_REG_PPC_ONLINE:
i = set_reg_val(id, *val);
@@ -2715,6 +2809,11 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
#endif
#endif
vcpu->arch.mmcr[0] = MMCR0_FC;
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ vcpu->arch.mmcr[0] |= MMCR0_PMCCEXT;
+ vcpu->arch.mmcra = MMCRA_BHRB_DISABLE;
+ }
+
vcpu->arch.ctrl = CTRL_RUNLATCH;
/* default to host PVR, since we can't spoof it */
kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR));
@@ -2745,6 +2844,11 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
vcpu->arch.hfscr_permitted = vcpu->arch.hfscr;
+ /*
+ * PM, EBB, TM are demand-faulted so start with it clear.
+ */
+ vcpu->arch.hfscr &= ~(HFSCR_PM | HFSCR_EBB | HFSCR_TM);
+
kvmppc_mmu_book3s_hv_init(vcpu);
vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
@@ -2869,13 +2973,13 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
unsigned long dec_nsec, now;
now = get_tb();
- if (now > vcpu->arch.dec_expires) {
+ if (now > kvmppc_dec_expires_host_tb(vcpu)) {
/* decrementer has already gone negative */
kvmppc_core_queue_dec(vcpu);
kvmppc_core_prepare_to_enter(vcpu);
return;
}
- dec_nsec = tb_to_ns(vcpu->arch.dec_expires - now);
+ dec_nsec = tb_to_ns(kvmppc_dec_expires_host_tb(vcpu) - now);
hrtimer_start(&vcpu->arch.dec_timer, dec_nsec, HRTIMER_MODE_REL);
vcpu->arch.timer_running = 1;
}
@@ -2883,14 +2987,14 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
extern int __kvmppc_vcore_entry(void);
static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
- struct kvm_vcpu *vcpu)
+ struct kvm_vcpu *vcpu, u64 tb)
{
u64 now;
if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
return;
spin_lock_irq(&vcpu->arch.tbacct_lock);
- now = mftb();
+ now = tb;
vcpu->arch.busy_stolen += vcore_stolen_time(vc, now) -
vcpu->arch.stolen_logged;
vcpu->arch.busy_preempt = now;
@@ -2945,30 +3049,59 @@ static void kvmppc_release_hwthread(int cpu)
tpaca->kvm_hstate.kvm_split_mode = NULL;
}
+static DEFINE_PER_CPU(struct kvm *, cpu_in_guest);
+
static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
{
struct kvm_nested_guest *nested = vcpu->arch.nested;
- cpumask_t *cpu_in_guest;
+ cpumask_t *need_tlb_flush;
int i;
+ if (nested)
+ need_tlb_flush = &nested->need_tlb_flush;
+ else
+ need_tlb_flush = &kvm->arch.need_tlb_flush;
+
cpu = cpu_first_tlb_thread_sibling(cpu);
- if (nested) {
- cpumask_set_cpu(cpu, &nested->need_tlb_flush);
- cpu_in_guest = &nested->cpu_in_guest;
- } else {
- cpumask_set_cpu(cpu, &kvm->arch.need_tlb_flush);
- cpu_in_guest = &kvm->arch.cpu_in_guest;
- }
+ for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu);
+ i += cpu_tlb_thread_sibling_step())
+ cpumask_set_cpu(i, need_tlb_flush);
+
/*
- * Make sure setting of bit in need_tlb_flush precedes
- * testing of cpu_in_guest bits. The matching barrier on
- * the other side is the first smp_mb() in kvmppc_run_core().
+ * Make sure setting of bit in need_tlb_flush precedes testing of
+ * cpu_in_guest. The matching barrier on the other side is hwsync
+ * when switching to guest MMU mode, which happens between
+ * cpu_in_guest being set to the guest kvm, and need_tlb_flush bit
+ * being tested.
*/
smp_mb();
+
for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu);
- i += cpu_tlb_thread_sibling_step())
- if (cpumask_test_cpu(i, cpu_in_guest))
+ i += cpu_tlb_thread_sibling_step()) {
+ struct kvm *running = *per_cpu_ptr(&cpu_in_guest, i);
+
+ if (running == kvm)
smp_call_function_single(i, do_nothing, NULL, 1);
+ }
+}
+
+static void do_migrate_away_vcpu(void *arg)
+{
+ struct kvm_vcpu *vcpu = arg;
+ struct kvm *kvm = vcpu->kvm;
+
+ /*
+ * If the guest has GTSE, it may execute tlbie, so do a eieio; tlbsync;
+ * ptesync sequence on the old CPU before migrating to a new one, in
+ * case we interrupted the guest between a tlbie ; eieio ;
+ * tlbsync; ptesync sequence.
+ *
+ * Otherwise, ptesync is sufficient for ordering tlbiel sequences.
+ */
+ if (kvm->arch.lpcr & LPCR_GTSE)
+ asm volatile("eieio; tlbsync; ptesync");
+ else
+ asm volatile("ptesync");
}
static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
@@ -2994,14 +3127,17 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
* can move around between pcpus. To cope with this, when
* a vcpu moves from one pcpu to another, we need to tell
* any vcpus running on the same core as this vcpu previously
- * ran to flush the TLB. The TLB is shared between threads,
- * so we use a single bit in .need_tlb_flush for all 4 threads.
+ * ran to flush the TLB.
*/
if (prev_cpu != pcpu) {
- if (prev_cpu >= 0 &&
- cpu_first_tlb_thread_sibling(prev_cpu) !=
- cpu_first_tlb_thread_sibling(pcpu))
- radix_flush_cpu(kvm, prev_cpu, vcpu);
+ if (prev_cpu >= 0) {
+ if (cpu_first_tlb_thread_sibling(prev_cpu) !=
+ cpu_first_tlb_thread_sibling(pcpu))
+ radix_flush_cpu(kvm, prev_cpu, vcpu);
+
+ smp_call_function_single(prev_cpu,
+ do_migrate_away_vcpu, vcpu, 1);
+ }
if (nested)
nested->prev_cpu[vcpu->arch.nested_vcpu_id] = pcpu;
else
@@ -3013,7 +3149,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
{
int cpu;
struct paca_struct *tpaca;
- struct kvm *kvm = vc->kvm;
cpu = vc->pcpu;
if (vcpu) {
@@ -3024,7 +3159,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
cpu += vcpu->arch.ptid;
vcpu->cpu = vc->pcpu;
vcpu->arch.thread_cpu = cpu;
- cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest);
}
tpaca = paca_ptrs[cpu];
tpaca->kvm_hstate.kvm_vcpu = vcpu;
@@ -3125,6 +3259,8 @@ static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc)
{
struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
+ WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
vc->vcore_state = VCORE_PREEMPT;
vc->pcpu = smp_processor_id();
if (vc->num_threads < threads_per_vcore(vc->kvm)) {
@@ -3134,14 +3270,16 @@ static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc)
}
/* Start accumulating stolen time */
- kvmppc_core_start_stolen(vc);
+ kvmppc_core_start_stolen(vc, mftb());
}
static void kvmppc_vcore_end_preempt(struct kvmppc_vcore *vc)
{
struct preempted_vcore_list *lp;
- kvmppc_core_end_stolen(vc);
+ WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
+ kvmppc_core_end_stolen(vc, mftb());
if (!list_empty(&vc->preempt_list)) {
lp = &per_cpu(preempted_vcores, vc->pcpu);
spin_lock(&lp->lock);
@@ -3268,7 +3406,7 @@ static void prepare_threads(struct kvmppc_vcore *vc)
vcpu->arch.ret = RESUME_GUEST;
else
continue;
- kvmppc_remove_runnable(vc, vcpu);
+ kvmppc_remove_runnable(vc, vcpu, mftb());
wake_up(&vcpu->arch.cpu_run);
}
}
@@ -3287,7 +3425,7 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
list_del_init(&pvc->preempt_list);
if (pvc->runner == NULL) {
pvc->vcore_state = VCORE_INACTIVE;
- kvmppc_core_end_stolen(pvc);
+ kvmppc_core_end_stolen(pvc, mftb());
}
spin_unlock(&pvc->lock);
continue;
@@ -3296,7 +3434,7 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
spin_unlock(&pvc->lock);
continue;
}
- kvmppc_core_end_stolen(pvc);
+ kvmppc_core_end_stolen(pvc, mftb());
pvc->vcore_state = VCORE_PIGGYBACK;
if (cip->total_threads >= target_threads)
break;
@@ -3340,7 +3478,7 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
*/
spin_unlock(&vc->lock);
/* cancel pending dec exception if dec is positive */
- if (now < vcpu->arch.dec_expires &&
+ if (now < kvmppc_dec_expires_host_tb(vcpu) &&
kvmppc_core_pending_dec(vcpu))
kvmppc_core_dequeue_dec(vcpu);
@@ -3363,7 +3501,7 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
else
++still_running;
} else {
- kvmppc_remove_runnable(vc, vcpu);
+ kvmppc_remove_runnable(vc, vcpu, mftb());
wake_up(&vcpu->arch.cpu_run);
}
}
@@ -3372,7 +3510,7 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
kvmppc_vcore_preempt(vc);
} else if (vc->runner) {
vc->vcore_state = VCORE_PREEMPT;
- kvmppc_core_start_stolen(vc);
+ kvmppc_core_start_stolen(vc, mftb());
} else {
vc->vcore_state = VCORE_INACTIVE;
}
@@ -3503,7 +3641,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
for_each_runnable_thread(i, vcpu, vc) {
vcpu->arch.ret = -EBUSY;
- kvmppc_remove_runnable(vc, vcpu);
+ kvmppc_remove_runnable(vc, vcpu, mftb());
wake_up(&vcpu->arch.cpu_run);
}
goto out;
@@ -3748,7 +3886,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
kvmppc_release_hwthread(pcpu + i);
if (sip && sip->napped[i])
kvmppc_ipi_thread(pcpu + i);
- cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest);
}
spin_unlock(&vc->lock);
@@ -3770,211 +3907,137 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
trace_kvmppc_run_core(vc, 1);
}
-static void load_spr_state(struct kvm_vcpu *vcpu)
-{
- mtspr(SPRN_DSCR, vcpu->arch.dscr);
- mtspr(SPRN_IAMR, vcpu->arch.iamr);
- mtspr(SPRN_PSPB, vcpu->arch.pspb);
- mtspr(SPRN_FSCR, vcpu->arch.fscr);
- mtspr(SPRN_TAR, vcpu->arch.tar);
- mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
- mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
- mtspr(SPRN_BESCR, vcpu->arch.bescr);
- mtspr(SPRN_TIDR, vcpu->arch.tid);
- mtspr(SPRN_AMR, vcpu->arch.amr);
- mtspr(SPRN_UAMOR, vcpu->arch.uamor);
-
- /*
- * DAR, DSISR, and for nested HV, SPRGs must be set with MSR[RI]
- * clear (or hstate set appropriately to catch those registers
- * being clobbered if we take a MCE or SRESET), so those are done
- * later.
- */
-
- if (!(vcpu->arch.ctrl & 1))
- mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
-}
-
-static void store_spr_state(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
-
- vcpu->arch.iamr = mfspr(SPRN_IAMR);
- vcpu->arch.pspb = mfspr(SPRN_PSPB);
- vcpu->arch.fscr = mfspr(SPRN_FSCR);
- vcpu->arch.tar = mfspr(SPRN_TAR);
- vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
- vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
- vcpu->arch.bescr = mfspr(SPRN_BESCR);
- vcpu->arch.tid = mfspr(SPRN_TIDR);
- vcpu->arch.amr = mfspr(SPRN_AMR);
- vcpu->arch.uamor = mfspr(SPRN_UAMOR);
- vcpu->arch.dscr = mfspr(SPRN_DSCR);
-}
-
-/*
- * Privileged (non-hypervisor) host registers to save.
- */
-struct p9_host_os_sprs {
- unsigned long dscr;
- unsigned long tidr;
- unsigned long iamr;
- unsigned long amr;
- unsigned long fscr;
-};
-
-static void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs)
-{
- host_os_sprs->dscr = mfspr(SPRN_DSCR);
- host_os_sprs->tidr = mfspr(SPRN_TIDR);
- host_os_sprs->iamr = mfspr(SPRN_IAMR);
- host_os_sprs->amr = mfspr(SPRN_AMR);
- host_os_sprs->fscr = mfspr(SPRN_FSCR);
-}
-
-/* vcpu guest regs must already be saved */
-static void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
- struct p9_host_os_sprs *host_os_sprs)
-{
- mtspr(SPRN_PSPB, 0);
- mtspr(SPRN_UAMOR, 0);
-
- mtspr(SPRN_DSCR, host_os_sprs->dscr);
- mtspr(SPRN_TIDR, host_os_sprs->tidr);
- mtspr(SPRN_IAMR, host_os_sprs->iamr);
-
- if (host_os_sprs->amr != vcpu->arch.amr)
- mtspr(SPRN_AMR, host_os_sprs->amr);
-
- if (host_os_sprs->fscr != vcpu->arch.fscr)
- mtspr(SPRN_FSCR, host_os_sprs->fscr);
-
- /* Save guest CTRL register, set runlatch to 1 */
- if (!(vcpu->arch.ctrl & 1))
- mtspr(SPRN_CTRLT, 1);
-}
-
static inline bool hcall_is_xics(unsigned long req)
{
return req == H_EOI || req == H_CPPR || req == H_IPI ||
req == H_IPOLL || req == H_XIRR || req == H_XIRR_X;
}
-/*
- * Guest entry for POWER9 and later CPUs.
- */
-static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
- unsigned long lpcr)
+static void vcpu_vpa_increment_dispatch(struct kvm_vcpu *vcpu)
+{
+ struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
+ if (lp) {
+ u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
+ lp->yield_count = cpu_to_be32(yield_count);
+ vcpu->arch.vpa.dirty = 1;
+ }
+}
+
+/* call our hypervisor to load up HV regs and go */
+static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ unsigned long host_psscr;
+ unsigned long msr;
+ struct hv_guest_state hvregs;
struct p9_host_os_sprs host_os_sprs;
s64 dec;
- u64 tb;
- int trap, save_pmu;
-
- WARN_ON_ONCE(vcpu->arch.ceded);
+ int trap;
- dec = mfspr(SPRN_DEC);
- tb = mftb();
- if (dec < 0)
- return BOOK3S_INTERRUPT_HV_DECREMENTER;
- local_paca->kvm_hstate.dec_expires = dec + tb;
- if (local_paca->kvm_hstate.dec_expires < time_limit)
- time_limit = local_paca->kvm_hstate.dec_expires;
+ msr = mfmsr();
save_p9_host_os_sprs(&host_os_sprs);
- kvmhv_save_host_pmu(); /* saves it to PACA kvm_hstate */
+ /*
+ * We need to save and restore the guest visible part of the
+ * psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor
+ * doesn't do this for us. Note only required if pseries since
+ * this is done in kvmhv_vcpu_entry_p9() below otherwise.
+ */
+ host_psscr = mfspr(SPRN_PSSCR_PR);
- kvmppc_subcore_enter_guest();
+ kvmppc_msr_hard_disable_set_facilities(vcpu, msr);
+ if (lazy_irq_pending())
+ return 0;
- vc->entry_exit_map = 1;
- vc->in_guest = 1;
+ if (unlikely(load_vcpu_state(vcpu, &host_os_sprs)))
+ msr = mfmsr(); /* TM restore can update msr */
- if (vcpu->arch.vpa.pinned_addr) {
- struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
- u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
- lp->yield_count = cpu_to_be32(yield_count);
- vcpu->arch.vpa.dirty = 1;
- }
-
- if (cpu_has_feature(CPU_FTR_TM) ||
- cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
- kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
+ if (vcpu->arch.psscr != host_psscr)
+ mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr);
-#ifdef CONFIG_PPC_PSERIES
- if (kvmhv_on_pseries()) {
- barrier();
- if (vcpu->arch.vpa.pinned_addr) {
- struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
- get_lppaca()->pmcregs_in_use = lp->pmcregs_in_use;
- } else {
- get_lppaca()->pmcregs_in_use = 1;
- }
- barrier();
+ kvmhv_save_hv_regs(vcpu, &hvregs);
+ hvregs.lpcr = lpcr;
+ vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
+ hvregs.version = HV_GUEST_STATE_VERSION;
+ if (vcpu->arch.nested) {
+ hvregs.lpid = vcpu->arch.nested->shadow_lpid;
+ hvregs.vcpu_token = vcpu->arch.nested_vcpu_id;
+ } else {
+ hvregs.lpid = vcpu->kvm->arch.lpid;
+ hvregs.vcpu_token = vcpu->vcpu_id;
}
-#endif
- kvmhv_load_guest_pmu(vcpu);
-
- msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
- load_fp_state(&vcpu->arch.fp);
-#ifdef CONFIG_ALTIVEC
- load_vr_state(&vcpu->arch.vr);
-#endif
- mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
-
- load_spr_state(vcpu);
+ hvregs.hdec_expiry = time_limit;
/*
- * When setting DEC, we must always deal with irq_work_raise via NMI vs
- * setting DEC. The problem occurs right as we switch into guest mode
- * if a NMI hits and sets pending work and sets DEC, then that will
- * apply to the guest and not bring us back to the host.
+ * When setting DEC, we must always deal with irq_work_raise
+ * via NMI vs setting DEC. The problem occurs right as we
+ * switch into guest mode if a NMI hits and sets pending work
+ * and sets DEC, then that will apply to the guest and not
+ * bring us back to the host.
*
- * irq_work_raise could check a flag (or possibly LPCR[HDICE] for
- * example) and set HDEC to 1? That wouldn't solve the nested hv
- * case which needs to abort the hcall or zero the time limit.
+ * irq_work_raise could check a flag (or possibly LPCR[HDICE]
+ * for example) and set HDEC to 1? That wouldn't solve the
+ * nested hv case which needs to abort the hcall or zero the
+ * time limit.
*
* XXX: Another day's problem.
*/
- mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb());
+ mtspr(SPRN_DEC, kvmppc_dec_expires_host_tb(vcpu) - *tb);
+
+ mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
+ mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
+ switch_pmu_to_guest(vcpu, &host_os_sprs);
+ trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs),
+ __pa(&vcpu->arch.regs));
+ kvmhv_restore_hv_return_state(vcpu, &hvregs);
+ switch_pmu_to_host(vcpu, &host_os_sprs);
+ vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
+ vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
+ vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
+ vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
+
+ store_vcpu_state(vcpu);
- if (kvmhv_on_pseries()) {
- /*
- * We need to save and restore the guest visible part of the
- * psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor
- * doesn't do this for us. Note only required if pseries since
- * this is done in kvmhv_vcpu_entry_p9() below otherwise.
- */
- unsigned long host_psscr;
- /* call our hypervisor to load up HV regs and go */
- struct hv_guest_state hvregs;
+ dec = mfspr(SPRN_DEC);
+ if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
+ dec = (s32) dec;
+ *tb = mftb();
+ vcpu->arch.dec_expires = dec + (*tb + vc->tb_offset);
- host_psscr = mfspr(SPRN_PSSCR_PR);
- mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr);
- kvmhv_save_hv_regs(vcpu, &hvregs);
- hvregs.lpcr = lpcr;
- vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
- hvregs.version = HV_GUEST_STATE_VERSION;
- if (vcpu->arch.nested) {
- hvregs.lpid = vcpu->arch.nested->shadow_lpid;
- hvregs.vcpu_token = vcpu->arch.nested_vcpu_id;
- } else {
- hvregs.lpid = vcpu->kvm->arch.lpid;
- hvregs.vcpu_token = vcpu->vcpu_id;
- }
- hvregs.hdec_expiry = time_limit;
- mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
- mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
- trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs),
- __pa(&vcpu->arch.regs));
- kvmhv_restore_hv_return_state(vcpu, &hvregs);
- vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
- vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
- vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
- vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
+ timer_rearm_host_dec(*tb);
+
+ restore_p9_host_os_sprs(vcpu, &host_os_sprs);
+ if (vcpu->arch.psscr != host_psscr)
mtspr(SPRN_PSSCR_PR, host_psscr);
+ return trap;
+}
+
+/*
+ * Guest entry for POWER9 and later CPUs.
+ */
+static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
+ unsigned long lpcr, u64 *tb)
+{
+ u64 next_timer;
+ int trap;
+
+ next_timer = timer_get_next_tb();
+ if (*tb >= next_timer)
+ return BOOK3S_INTERRUPT_HV_DECREMENTER;
+ if (next_timer < time_limit)
+ time_limit = next_timer;
+ else if (*tb >= time_limit) /* nested time limit */
+ return BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER;
+
+ vcpu->arch.ceded = 0;
+
+ vcpu_vpa_increment_dispatch(vcpu);
+
+ if (kvmhv_on_pseries()) {
+ trap = kvmhv_vcpu_entry_p9_nested(vcpu, time_limit, lpcr, tb);
+
/* H_CEDE has to be handled now, not later */
if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
@@ -3982,9 +4045,16 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
kvmppc_set_gpr(vcpu, 3, 0);
trap = 0;
}
+
} else {
+ struct kvm *kvm = vcpu->kvm;
+
kvmppc_xive_push_vcpu(vcpu);
- trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr);
+
+ __this_cpu_write(cpu_in_guest, kvm);
+ trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr, tb);
+ __this_cpu_write(cpu_in_guest, NULL);
+
if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
!(vcpu->arch.shregs.msr & MSR_PR)) {
unsigned long req = kvmppc_get_gpr(vcpu, 3);
@@ -4009,65 +4079,11 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
}
kvmppc_xive_pull_vcpu(vcpu);
- if (kvm_is_radix(vcpu->kvm))
+ if (kvm_is_radix(kvm))
vcpu->arch.slb_max = 0;
}
- dec = mfspr(SPRN_DEC);
- if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
- dec = (s32) dec;
- tb = mftb();
- vcpu->arch.dec_expires = dec + tb;
- vcpu->cpu = -1;
- vcpu->arch.thread_cpu = -1;
-
- store_spr_state(vcpu);
-
- restore_p9_host_os_sprs(vcpu, &host_os_sprs);
-
- msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
- store_fp_state(&vcpu->arch.fp);
-#ifdef CONFIG_ALTIVEC
- store_vr_state(&vcpu->arch.vr);
-#endif
- vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
-
- if (cpu_has_feature(CPU_FTR_TM) ||
- cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
- kvmppc_save_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
-
- save_pmu = 1;
- if (vcpu->arch.vpa.pinned_addr) {
- struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
- u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
- lp->yield_count = cpu_to_be32(yield_count);
- vcpu->arch.vpa.dirty = 1;
- save_pmu = lp->pmcregs_in_use;
- }
- /* Must save pmu if this guest is capable of running nested guests */
- save_pmu |= nesting_enabled(vcpu->kvm);
-
- kvmhv_save_guest_pmu(vcpu, save_pmu);
-#ifdef CONFIG_PPC_PSERIES
- if (kvmhv_on_pseries()) {
- barrier();
- get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse();
- barrier();
- }
-#endif
-
- vc->entry_exit_map = 0x101;
- vc->in_guest = 0;
-
- mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - mftb());
- /* We may have raced with new irq work */
- if (test_irq_work_pending())
- set_dec(1);
- mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
-
- kvmhv_load_host_pmu();
-
- kvmppc_subcore_exit_guest();
+ vcpu_vpa_increment_dispatch(vcpu);
return trap;
}
@@ -4132,6 +4148,13 @@ static bool kvmppc_vcpu_woken(struct kvm_vcpu *vcpu)
return false;
}
+static bool kvmppc_vcpu_check_block(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu->arch.ceded || kvmppc_vcpu_woken(vcpu))
+ return true;
+ return false;
+}
+
/*
* Check to see if any of the runnable vcpus on the vcore have pending
* exceptions or are no longer ceded
@@ -4142,7 +4165,7 @@ static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
int i;
for_each_runnable_thread(i, vcpu, vc) {
- if (!vcpu->arch.ceded || kvmppc_vcpu_woken(vcpu))
+ if (kvmppc_vcpu_check_block(vcpu))
return 1;
}
@@ -4159,6 +4182,8 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
int do_sleep = 1;
u64 block_ns;
+ WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
/* Poll for pending exceptions and ceded state */
cur = start_poll = ktime_get();
if (vc->halt_poll_ns) {
@@ -4355,7 +4380,7 @@ static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu)
for_each_runnable_thread(i, v, vc) {
kvmppc_core_prepare_to_enter(v);
if (signal_pending(v->arch.run_task)) {
- kvmppc_remove_runnable(vc, v);
+ kvmppc_remove_runnable(vc, v, mftb());
v->stat.signal_exits++;
v->run->exit_reason = KVM_EXIT_INTR;
v->arch.ret = -EINTR;
@@ -4396,7 +4421,7 @@ static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu)
kvmppc_vcore_end_preempt(vc);
if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
- kvmppc_remove_runnable(vc, vcpu);
+ kvmppc_remove_runnable(vc, vcpu, mftb());
vcpu->stat.signal_exits++;
run->exit_reason = KVM_EXIT_INTR;
vcpu->arch.ret = -EINTR;
@@ -4417,12 +4442,15 @@ static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu)
int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
unsigned long lpcr)
{
+ struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
struct kvm_run *run = vcpu->run;
int trap, r, pcpu;
int srcu_idx;
struct kvmppc_vcore *vc;
struct kvm *kvm = vcpu->kvm;
struct kvm_nested_guest *nested = vcpu->arch.nested;
+ unsigned long flags;
+ u64 tb;
trace_kvmppc_run_vcpu_enter(vcpu);
@@ -4433,16 +4461,11 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
vc = vcpu->arch.vcore;
vcpu->arch.ceded = 0;
vcpu->arch.run_task = current;
- vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
- vcpu->arch.busy_preempt = TB_NIL;
vcpu->arch.last_inst = KVM_INST_FETCH_FAILED;
- vc->runnable_threads[0] = vcpu;
- vc->n_runnable = 1;
- vc->runner = vcpu;
/* See if the MMU is ready to go */
- if (!kvm->arch.mmu_ready) {
+ if (unlikely(!kvm->arch.mmu_ready)) {
r = kvmhv_setup_mmu(vcpu);
if (r) {
run->exit_reason = KVM_EXIT_FAIL_ENTRY;
@@ -4457,29 +4480,21 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
kvmppc_update_vpas(vcpu);
- init_vcore_to_run(vc);
- vc->preempt_tb = TB_NIL;
-
preempt_disable();
pcpu = smp_processor_id();
- vc->pcpu = pcpu;
if (kvm_is_radix(kvm))
kvmppc_prepare_radix_vcpu(vcpu, pcpu);
- local_irq_disable();
- hard_irq_disable();
+ /* flags save not required, but irq_pmu has no disable/enable API */
+ powerpc_local_irq_pmu_save(flags);
+
if (signal_pending(current))
goto sigpend;
- if (lazy_irq_pending() || need_resched() || !kvm->arch.mmu_ready)
+ if (need_resched() || !kvm->arch.mmu_ready)
goto out;
if (!nested) {
kvmppc_core_prepare_to_enter(vcpu);
- if (vcpu->arch.doorbell_request) {
- vc->dpdes = 1;
- smp_wmb();
- vcpu->arch.doorbell_request = 0;
- }
if (test_bit(BOOK3S_IRQPRIO_EXTERNAL,
&vcpu->arch.pending_exceptions))
lpcr |= LPCR_MER;
@@ -4490,16 +4505,23 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
goto out;
}
- kvmppc_clear_host_core(pcpu);
+ if (vcpu->arch.timer_running) {
+ hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+ vcpu->arch.timer_running = 0;
+ }
- local_paca->kvm_hstate.napping = 0;
- local_paca->kvm_hstate.kvm_split_mode = NULL;
- kvmppc_start_thread(vcpu, vc);
- kvmppc_create_dtl_entry(vcpu, vc);
- trace_kvm_guest_enter(vcpu);
+ tb = mftb();
- vc->vcore_state = VCORE_RUNNING;
- trace_kvmppc_run_core(vc, 0);
+ vcpu->cpu = pcpu;
+ vcpu->arch.thread_cpu = pcpu;
+ vc->pcpu = pcpu;
+ local_paca->kvm_hstate.kvm_vcpu = vcpu;
+ local_paca->kvm_hstate.ptid = 0;
+ local_paca->kvm_hstate.fake_suspend = 0;
+
+ __kvmppc_create_dtl_entry(vcpu, pcpu, tb + vc->tb_offset, 0);
+
+ trace_kvm_guest_enter(vcpu);
guest_enter_irqoff();
@@ -4510,7 +4532,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
/* Tell lockdep that we're about to enable interrupts */
trace_hardirqs_on();
- trap = kvmhv_p9_guest_entry(vcpu, time_limit, lpcr);
+ trap = kvmhv_p9_guest_entry(vcpu, time_limit, lpcr, &tb);
vcpu->arch.trap = trap;
trace_hardirqs_off();
@@ -4521,8 +4543,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
set_irq_happened(trap);
- kvmppc_set_host_core(pcpu);
-
context_tracking_guest_exit();
if (!vtime_accounting_enabled_this_cpu()) {
local_irq_enable();
@@ -4538,9 +4558,10 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
}
vtime_account_guest_exit();
- local_irq_enable();
+ vcpu->cpu = -1;
+ vcpu->arch.thread_cpu = -1;
- cpumask_clear_cpu(pcpu, &kvm->arch.cpu_in_guest);
+ powerpc_local_irq_pmu_restore(flags);
preempt_enable();
@@ -4550,7 +4571,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
* by L2 and the L1 decrementer is provided in hdec_expires
*/
if (kvmppc_core_pending_dec(vcpu) &&
- ((get_tb() < vcpu->arch.dec_expires) ||
+ ((tb < kvmppc_dec_expires_host_tb(vcpu)) ||
(trap == BOOK3S_INTERRUPT_SYSCALL &&
kvmppc_get_gpr(vcpu, 3) == H_ENTER_NESTED)))
kvmppc_core_dequeue_dec(vcpu);
@@ -4565,28 +4586,31 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
}
vcpu->arch.ret = r;
- if (is_kvmppc_resume_guest(r) && vcpu->arch.ceded &&
- !kvmppc_vcpu_woken(vcpu)) {
+ if (is_kvmppc_resume_guest(r) && !kvmppc_vcpu_check_block(vcpu)) {
kvmppc_set_timer(vcpu);
- while (vcpu->arch.ceded && !kvmppc_vcpu_woken(vcpu)) {
+
+ prepare_to_rcuwait(wait);
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
if (signal_pending(current)) {
vcpu->stat.signal_exits++;
run->exit_reason = KVM_EXIT_INTR;
vcpu->arch.ret = -EINTR;
break;
}
- spin_lock(&vc->lock);
- kvmppc_vcore_blocked(vc);
- spin_unlock(&vc->lock);
+
+ if (kvmppc_vcpu_check_block(vcpu))
+ break;
+
+ trace_kvmppc_vcore_blocked(vc, 0);
+ schedule();
+ trace_kvmppc_vcore_blocked(vc, 1);
}
+ finish_rcuwait(wait);
}
vcpu->arch.ceded = 0;
- vc->vcore_state = VCORE_INACTIVE;
- trace_kvmppc_run_core(vc, 1);
-
done:
- kvmppc_remove_runnable(vc, vcpu);
trace_kvmppc_run_vcpu_exit(vcpu);
return vcpu->arch.ret;
@@ -4596,7 +4620,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
run->exit_reason = KVM_EXIT_INTR;
vcpu->arch.ret = -EINTR;
out:
- local_irq_enable();
+ powerpc_local_irq_pmu_restore(flags);
preempt_enable();
goto done;
}
@@ -4606,23 +4630,25 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
struct kvm_run *run = vcpu->run;
int r;
int srcu_idx;
- unsigned long ebb_regs[3] = {}; /* shut up GCC */
- unsigned long user_tar = 0;
- unsigned int user_vrsave;
struct kvm *kvm;
+ unsigned long msr;
if (!vcpu->arch.sane) {
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
return -EINVAL;
}
+ /* No need to go into the guest when all we'll do is come back out */
+ if (signal_pending(current)) {
+ run->exit_reason = KVM_EXIT_INTR;
+ return -EINTR;
+ }
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/*
* Don't allow entry with a suspended transaction, because
* the guest entry/exit code will lose it.
- * If the guest has TM enabled, save away their TM-related SPRs
- * (they will get restored by the TM unavailable interrupt).
*/
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
(current->thread.regs->msr & MSR_TM)) {
if (MSR_TM_ACTIVE(current->thread.regs->msr)) {
@@ -4630,12 +4656,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
run->fail_entry.hardware_entry_failure_reason = 0;
return -EINVAL;
}
- /* Enable TM so we can read the TM SPRs */
- mtmsr(mfmsr() | MSR_TM);
- current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
- current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
- current->thread.tm_texasr = mfspr(SPRN_TEXASR);
- current->thread.regs->msr &= ~MSR_TM;
}
#endif
@@ -4650,29 +4670,30 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
kvmppc_core_prepare_to_enter(vcpu);
- /* No need to go into the guest when all we'll do is come back out */
- if (signal_pending(current)) {
- run->exit_reason = KVM_EXIT_INTR;
- return -EINTR;
- }
-
kvm = vcpu->kvm;
atomic_inc(&kvm->arch.vcpus_running);
/* Order vcpus_running vs. mmu_ready, see kvmppc_alloc_reset_hpt */
smp_mb();
- flush_all_to_thread(current);
+ msr = 0;
+ if (IS_ENABLED(CONFIG_PPC_FPU))
+ msr |= MSR_FP;
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ msr |= MSR_VEC;
+ if (cpu_has_feature(CPU_FTR_VSX))
+ msr |= MSR_VSX;
+ if ((cpu_has_feature(CPU_FTR_TM) ||
+ cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
+ (vcpu->arch.hfscr & HFSCR_TM))
+ msr |= MSR_TM;
+ msr = msr_check_and_set(msr);
- /* Save userspace EBB and other register values */
- if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
- ebb_regs[0] = mfspr(SPRN_EBBHR);
- ebb_regs[1] = mfspr(SPRN_EBBRR);
- ebb_regs[2] = mfspr(SPRN_BESCR);
- user_tar = mfspr(SPRN_TAR);
- }
- user_vrsave = mfspr(SPRN_VRSAVE);
+ kvmppc_save_user_regs();
- vcpu->arch.waitp = &vcpu->arch.vcore->wait;
+ kvmppc_save_current_sprs();
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ vcpu->arch.waitp = &vcpu->arch.vcore->wait;
vcpu->arch.pgdir = kvm->mm->pgd;
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
@@ -4711,15 +4732,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
}
} while (is_kvmppc_resume_guest(r));
- /* Restore userspace EBB and other register values */
- if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
- mtspr(SPRN_EBBHR, ebb_regs[0]);
- mtspr(SPRN_EBBRR, ebb_regs[1]);
- mtspr(SPRN_BESCR, ebb_regs[2]);
- mtspr(SPRN_TAR, user_tar);
- }
- mtspr(SPRN_VRSAVE, user_vrsave);
-
vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
atomic_dec(&kvm->arch.vcpus_running);
@@ -4861,8 +4873,12 @@ static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
unsigned long npages = mem->memory_size >> PAGE_SHIFT;
if (change == KVM_MR_CREATE) {
- slot->arch.rmap = vzalloc(array_size(npages,
- sizeof(*slot->arch.rmap)));
+ unsigned long size = array_size(npages, sizeof(*slot->arch.rmap));
+
+ if ((size >> PAGE_SHIFT) > totalram_pages())
+ return -ENOMEM;
+
+ slot->arch.rmap = vzalloc(size);
if (!slot->arch.rmap)
return -ENOMEM;
}
@@ -5072,6 +5088,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
*/
int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
{
+ unsigned long lpcr, lpcr_mask;
+
if (nesting_enabled(kvm))
kvmhv_release_all_nested(kvm);
kvmppc_rmap_reset(kvm);
@@ -5081,8 +5099,13 @@ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
kvm->arch.radix = 0;
spin_unlock(&kvm->mmu_lock);
kvmppc_free_radix(kvm);
- kvmppc_update_lpcr(kvm, LPCR_VPM1,
- LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
+
+ lpcr = LPCR_VPM1;
+ lpcr_mask = LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR;
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ lpcr_mask |= LPCR_HAIL;
+ kvmppc_update_lpcr(kvm, lpcr, lpcr_mask);
+
return 0;
}
@@ -5092,6 +5115,7 @@ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
*/
int kvmppc_switch_mmu_to_radix(struct kvm *kvm)
{
+ unsigned long lpcr, lpcr_mask;
int err;
err = kvmppc_init_vm_radix(kvm);
@@ -5103,8 +5127,17 @@ int kvmppc_switch_mmu_to_radix(struct kvm *kvm)
kvm->arch.radix = 1;
spin_unlock(&kvm->mmu_lock);
kvmppc_free_hpt(&kvm->arch.hpt);
- kvmppc_update_lpcr(kvm, LPCR_UPRT | LPCR_GTSE | LPCR_HR,
- LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
+
+ lpcr = LPCR_UPRT | LPCR_GTSE | LPCR_HR;
+ lpcr_mask = LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR;
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ lpcr_mask |= LPCR_HAIL;
+ if (cpu_has_feature(CPU_FTR_HVMODE) &&
+ (kvm->arch.host_lpcr & LPCR_HAIL))
+ lpcr |= LPCR_HAIL;
+ }
+ kvmppc_update_lpcr(kvm, lpcr, lpcr_mask);
+
return 0;
}
@@ -5126,6 +5159,9 @@ void kvmppc_alloc_host_rm_ops(void)
int cpu, core;
int size;
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ return;
+
/* Not the first time here ? */
if (kvmppc_host_rm_ops_hv != NULL)
return;
@@ -5268,6 +5304,10 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
kvm->arch.mmu_ready = 1;
lpcr &= ~LPCR_VPM1;
lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR;
+ if (cpu_has_feature(CPU_FTR_HVMODE) &&
+ cpu_has_feature(CPU_FTR_ARCH_31) &&
+ (kvm->arch.host_lpcr & LPCR_HAIL))
+ lpcr |= LPCR_HAIL;
ret = kvmppc_init_vm_radix(kvm);
if (ret) {
kvmppc_free_lpid(kvm->arch.lpid);
@@ -6063,9 +6103,11 @@ static int kvmppc_book3s_init_hv(void)
if (r)
return r;
- r = kvm_init_subcore_bitmap();
- if (r)
- return r;
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+ r = kvm_init_subcore_bitmap();
+ if (r)
+ return r;
+ }
/*
* We need a way of accessing the XICS interrupt controller,
diff --git a/arch/powerpc/kvm/book3s_hv.h b/arch/powerpc/kvm/book3s_hv.h
new file mode 100644
index 000000000000..6b7f07d9026b
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv.h
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Privileged (non-hypervisor) host registers to save.
+ */
+struct p9_host_os_sprs {
+ unsigned long iamr;
+ unsigned long amr;
+
+ unsigned int pmc1;
+ unsigned int pmc2;
+ unsigned int pmc3;
+ unsigned int pmc4;
+ unsigned int pmc5;
+ unsigned int pmc6;
+ unsigned long mmcr0;
+ unsigned long mmcr1;
+ unsigned long mmcr2;
+ unsigned long mmcr3;
+ unsigned long mmcra;
+ unsigned long siar;
+ unsigned long sier1;
+ unsigned long sier2;
+ unsigned long sier3;
+ unsigned long sdar;
+};
+
+static inline bool nesting_enabled(struct kvm *kvm)
+{
+ return kvm->arch.nested_enable && kvm_is_radix(kvm);
+}
+
+bool load_vcpu_state(struct kvm_vcpu *vcpu,
+ struct p9_host_os_sprs *host_os_sprs);
+void store_vcpu_state(struct kvm_vcpu *vcpu);
+void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs);
+void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
+ struct p9_host_os_sprs *host_os_sprs);
+void switch_pmu_to_guest(struct kvm_vcpu *vcpu,
+ struct p9_host_os_sprs *host_os_sprs);
+void switch_pmu_to_host(struct kvm_vcpu *vcpu,
+ struct p9_host_os_sprs *host_os_sprs);
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 70b7a8f97153..7d6d91338c3f 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -649,6 +649,8 @@ void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
int ext;
unsigned long lpcr;
+ WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
/* Insert EXTERNAL bit into LPCR at the MER bit position */
ext = (vcpu->arch.pending_exceptions >> BOOK3S_IRQPRIO_EXTERNAL) & 1;
lpcr = mfspr(SPRN_LPCR);
@@ -682,60 +684,23 @@ static void flush_guest_tlb(struct kvm *kvm)
unsigned long rb, set;
rb = PPC_BIT(52); /* IS = 2 */
- if (kvm_is_radix(kvm)) {
- /* R=1 PRS=1 RIC=2 */
+ for (set = 0; set < kvm->arch.tlb_sets; ++set) {
+ /* R=0 PRS=0 RIC=0 */
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
- : : "r" (rb), "i" (1), "i" (1), "i" (2),
+ : : "r" (rb), "i" (0), "i" (0), "i" (0),
"r" (0) : "memory");
- for (set = 1; set < kvm->arch.tlb_sets; ++set) {
- rb += PPC_BIT(51); /* increment set number */
- /* R=1 PRS=1 RIC=0 */
- asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
- : : "r" (rb), "i" (1), "i" (1), "i" (0),
- "r" (0) : "memory");
- }
- asm volatile("ptesync": : :"memory");
- // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now.
- asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory");
- } else {
- for (set = 0; set < kvm->arch.tlb_sets; ++set) {
- /* R=0 PRS=0 RIC=0 */
- asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
- : : "r" (rb), "i" (0), "i" (0), "i" (0),
- "r" (0) : "memory");
- rb += PPC_BIT(51); /* increment set number */
- }
- asm volatile("ptesync": : :"memory");
- // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now.
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory");
+ rb += PPC_BIT(51); /* increment set number */
}
+ asm volatile("ptesync": : :"memory");
}
-void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu,
- struct kvm_nested_guest *nested)
+void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu)
{
- cpumask_t *need_tlb_flush;
-
- /*
- * On POWER9, individual threads can come in here, but the
- * TLB is shared between the 4 threads in a core, hence
- * invalidating on one thread invalidates for all.
- * Thus we make all 4 threads use the same bit.
- */
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- pcpu = cpu_first_tlb_thread_sibling(pcpu);
-
- if (nested)
- need_tlb_flush = &nested->need_tlb_flush;
- else
- need_tlb_flush = &kvm->arch.need_tlb_flush;
-
- if (cpumask_test_cpu(pcpu, need_tlb_flush)) {
+ if (cpumask_test_cpu(pcpu, &kvm->arch.need_tlb_flush)) {
flush_guest_tlb(kvm);
/* Clear the bit after the TLB flush */
- cpumask_clear_cpu(pcpu, need_tlb_flush);
+ cpumask_clear_cpu(pcpu, &kvm->arch.need_tlb_flush);
}
}
EXPORT_SYMBOL_GPL(kvmppc_check_need_tlb_flush);
diff --git a/arch/powerpc/kvm/book3s_hv_hmi.c b/arch/powerpc/kvm/book3s_hv_hmi.c
index 9af660476314..1ec50c69678b 100644
--- a/arch/powerpc/kvm/book3s_hv_hmi.c
+++ b/arch/powerpc/kvm/book3s_hv_hmi.c
@@ -20,10 +20,15 @@ void wait_for_subcore_guest_exit(void)
/*
* NULL bitmap pointer indicates that KVM module hasn't
- * been loaded yet and hence no guests are running.
+ * been loaded yet and hence no guests are running, or running
+ * on POWER9 or newer CPU.
+ *
* If no KVM is in use, no need to co-ordinate among threads
* as all of them will always be in host and no one is going
* to modify TB other than the opal hmi handler.
+ *
+ * POWER9 and newer don't need this synchronisation.
+ *
* Hence, just return from here.
*/
if (!local_paca->sibling_subcore_state)
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 4444f83cb133..59d89e4b154a 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -104,7 +104,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
mtlr r0
blr
-_GLOBAL(kvmhv_save_host_pmu)
+/*
+ * void kvmhv_save_host_pmu(void)
+ */
+kvmhv_save_host_pmu:
BEGIN_FTR_SECTION
/* Work around P8 PMAE bug */
li r3, -1
@@ -138,14 +141,6 @@ BEGIN_FTR_SECTION
std r8, HSTATE_MMCR2(r13)
std r9, HSTATE_SIER(r13)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-BEGIN_FTR_SECTION
- mfspr r5, SPRN_MMCR3
- mfspr r6, SPRN_SIER2
- mfspr r7, SPRN_SIER3
- std r5, HSTATE_MMCR3(r13)
- std r6, HSTATE_SIER2(r13)
- std r7, HSTATE_SIER3(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
mfspr r3, SPRN_PMC1
mfspr r5, SPRN_PMC2
mfspr r6, SPRN_PMC3
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index ed8a2c9f5629..a2e34efb8d31 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -358,6 +358,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
/* convert TB values/offsets to host (L0) values */
hdec_exp = l2_hv.hdec_expiry - vc->tb_offset;
vc->tb_offset += l2_hv.tb_offset;
+ vcpu->arch.dec_expires += l2_hv.tb_offset;
/* set L1 state to L2 state */
vcpu->arch.nested = l2;
@@ -374,11 +375,6 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
vcpu->arch.ret = RESUME_GUEST;
vcpu->arch.trap = 0;
do {
- if (mftb() >= hdec_exp) {
- vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER;
- r = RESUME_HOST;
- break;
- }
r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr);
} while (is_kvmppc_resume_guest(r));
@@ -399,6 +395,8 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
if (l2_regs.msr & MSR_TS_MASK)
vcpu->arch.shregs.msr |= MSR_TS_S;
vc->tb_offset = saved_l1_hv.tb_offset;
+ /* XXX: is this always the same delta as saved_l1_hv.tb_offset? */
+ vcpu->arch.dec_expires -= l2_hv.tb_offset;
restore_hv_regs(vcpu, &saved_l1_hv);
vcpu->arch.purr += delta_purr;
vcpu->arch.spurr += delta_spurr;
@@ -582,7 +580,7 @@ long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu)
if (eaddr & (0xFFFUL << 52))
return H_PARAMETER;
- buf = kzalloc(n, GFP_KERNEL);
+ buf = kzalloc(n, GFP_KERNEL | __GFP_NOWARN);
if (!buf)
return H_NO_MEM;
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index 961b3d70483c..a28e5b3daabd 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -4,8 +4,439 @@
#include <asm/asm-prototypes.h>
#include <asm/dbell.h>
#include <asm/kvm_ppc.h>
+#include <asm/pmc.h>
#include <asm/ppc-opcode.h>
+#include "book3s_hv.h"
+
+static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra)
+{
+ if (!(mmcr0 & MMCR0_FC))
+ goto do_freeze;
+ if (mmcra & MMCRA_SAMPLE_ENABLE)
+ goto do_freeze;
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ if (!(mmcr0 & MMCR0_PMCCEXT))
+ goto do_freeze;
+ if (!(mmcra & MMCRA_BHRB_DISABLE))
+ goto do_freeze;
+ }
+ return;
+
+do_freeze:
+ mmcr0 = MMCR0_FC;
+ mmcra = 0;
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ mmcr0 |= MMCR0_PMCCEXT;
+ mmcra = MMCRA_BHRB_DISABLE;
+ }
+
+ mtspr(SPRN_MMCR0, mmcr0);
+ mtspr(SPRN_MMCRA, mmcra);
+ isync();
+}
+
+void switch_pmu_to_guest(struct kvm_vcpu *vcpu,
+ struct p9_host_os_sprs *host_os_sprs)
+{
+ struct lppaca *lp;
+ int load_pmu = 1;
+
+ lp = vcpu->arch.vpa.pinned_addr;
+ if (lp)
+ load_pmu = lp->pmcregs_in_use;
+
+ /* Save host */
+ if (ppc_get_pmu_inuse()) {
+ /*
+ * It might be better to put PMU handling (at least for the
+ * host) in the perf subsystem because it knows more about what
+ * is being used.
+ */
+
+ /* POWER9, POWER10 do not implement HPMC or SPMC */
+
+ host_os_sprs->mmcr0 = mfspr(SPRN_MMCR0);
+ host_os_sprs->mmcra = mfspr(SPRN_MMCRA);
+
+ freeze_pmu(host_os_sprs->mmcr0, host_os_sprs->mmcra);
+
+ host_os_sprs->pmc1 = mfspr(SPRN_PMC1);
+ host_os_sprs->pmc2 = mfspr(SPRN_PMC2);
+ host_os_sprs->pmc3 = mfspr(SPRN_PMC3);
+ host_os_sprs->pmc4 = mfspr(SPRN_PMC4);
+ host_os_sprs->pmc5 = mfspr(SPRN_PMC5);
+ host_os_sprs->pmc6 = mfspr(SPRN_PMC6);
+ host_os_sprs->mmcr1 = mfspr(SPRN_MMCR1);
+ host_os_sprs->mmcr2 = mfspr(SPRN_MMCR2);
+ host_os_sprs->sdar = mfspr(SPRN_SDAR);
+ host_os_sprs->siar = mfspr(SPRN_SIAR);
+ host_os_sprs->sier1 = mfspr(SPRN_SIER);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ host_os_sprs->mmcr3 = mfspr(SPRN_MMCR3);
+ host_os_sprs->sier2 = mfspr(SPRN_SIER2);
+ host_os_sprs->sier3 = mfspr(SPRN_SIER3);
+ }
+ }
+
+#ifdef CONFIG_PPC_PSERIES
+ /* After saving PMU, before loading guest PMU, flip pmcregs_in_use */
+ if (kvmhv_on_pseries()) {
+ barrier();
+ get_lppaca()->pmcregs_in_use = load_pmu;
+ barrier();
+ }
+#endif
+
+ /*
+ * Load guest. If the VPA said the PMCs are not in use but the guest
+ * tried to access them anyway, HFSCR[PM] will be set by the HFAC
+ * fault so we can make forward progress.
+ */
+ if (load_pmu || (vcpu->arch.hfscr & HFSCR_PM)) {
+ mtspr(SPRN_PMC1, vcpu->arch.pmc[0]);
+ mtspr(SPRN_PMC2, vcpu->arch.pmc[1]);
+ mtspr(SPRN_PMC3, vcpu->arch.pmc[2]);
+ mtspr(SPRN_PMC4, vcpu->arch.pmc[3]);
+ mtspr(SPRN_PMC5, vcpu->arch.pmc[4]);
+ mtspr(SPRN_PMC6, vcpu->arch.pmc[5]);
+ mtspr(SPRN_MMCR1, vcpu->arch.mmcr[1]);
+ mtspr(SPRN_MMCR2, vcpu->arch.mmcr[2]);
+ mtspr(SPRN_SDAR, vcpu->arch.sdar);
+ mtspr(SPRN_SIAR, vcpu->arch.siar);
+ mtspr(SPRN_SIER, vcpu->arch.sier[0]);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ mtspr(SPRN_MMCR3, vcpu->arch.mmcr[3]);
+ mtspr(SPRN_SIER2, vcpu->arch.sier[1]);
+ mtspr(SPRN_SIER3, vcpu->arch.sier[2]);
+ }
+
+ /* Set MMCRA then MMCR0 last */
+ mtspr(SPRN_MMCRA, vcpu->arch.mmcra);
+ mtspr(SPRN_MMCR0, vcpu->arch.mmcr[0]);
+ /* No isync necessary because we're starting counters */
+
+ if (!vcpu->arch.nested &&
+ (vcpu->arch.hfscr_permitted & HFSCR_PM))
+ vcpu->arch.hfscr |= HFSCR_PM;
+ }
+}
+EXPORT_SYMBOL_GPL(switch_pmu_to_guest);
+
+void switch_pmu_to_host(struct kvm_vcpu *vcpu,
+ struct p9_host_os_sprs *host_os_sprs)
+{
+ struct lppaca *lp;
+ int save_pmu = 1;
+
+ lp = vcpu->arch.vpa.pinned_addr;
+ if (lp)
+ save_pmu = lp->pmcregs_in_use;
+ if (IS_ENABLED(CONFIG_KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND)) {
+ /*
+ * Save pmu if this guest is capable of running nested guests.
+ * This is option is for old L1s that do not set their
+ * lppaca->pmcregs_in_use properly when entering their L2.
+ */
+ save_pmu |= nesting_enabled(vcpu->kvm);
+ }
+
+ if (save_pmu) {
+ vcpu->arch.mmcr[0] = mfspr(SPRN_MMCR0);
+ vcpu->arch.mmcra = mfspr(SPRN_MMCRA);
+
+ freeze_pmu(vcpu->arch.mmcr[0], vcpu->arch.mmcra);
+
+ vcpu->arch.pmc[0] = mfspr(SPRN_PMC1);
+ vcpu->arch.pmc[1] = mfspr(SPRN_PMC2);
+ vcpu->arch.pmc[2] = mfspr(SPRN_PMC3);
+ vcpu->arch.pmc[3] = mfspr(SPRN_PMC4);
+ vcpu->arch.pmc[4] = mfspr(SPRN_PMC5);
+ vcpu->arch.pmc[5] = mfspr(SPRN_PMC6);
+ vcpu->arch.mmcr[1] = mfspr(SPRN_MMCR1);
+ vcpu->arch.mmcr[2] = mfspr(SPRN_MMCR2);
+ vcpu->arch.sdar = mfspr(SPRN_SDAR);
+ vcpu->arch.siar = mfspr(SPRN_SIAR);
+ vcpu->arch.sier[0] = mfspr(SPRN_SIER);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ vcpu->arch.mmcr[3] = mfspr(SPRN_MMCR3);
+ vcpu->arch.sier[1] = mfspr(SPRN_SIER2);
+ vcpu->arch.sier[2] = mfspr(SPRN_SIER3);
+ }
+
+ } else if (vcpu->arch.hfscr & HFSCR_PM) {
+ /*
+ * The guest accessed PMC SPRs without specifying they should
+ * be preserved, or it cleared pmcregs_in_use after the last
+ * access. Just ensure they are frozen.
+ */
+ freeze_pmu(mfspr(SPRN_MMCR0), mfspr(SPRN_MMCRA));
+
+ /*
+ * Demand-fault PMU register access in the guest.
+ *
+ * This is used to grab the guest's VPA pmcregs_in_use value
+ * and reflect it into the host's VPA in the case of a nested
+ * hypervisor.
+ *
+ * It also avoids having to zero-out SPRs after each guest
+ * exit to avoid side-channels when.
+ *
+ * This is cleared here when we exit the guest, so later HFSCR
+ * interrupt handling can add it back to run the guest with
+ * PM enabled next time.
+ */
+ if (!vcpu->arch.nested)
+ vcpu->arch.hfscr &= ~HFSCR_PM;
+ } /* otherwise the PMU should still be frozen */
+
+#ifdef CONFIG_PPC_PSERIES
+ if (kvmhv_on_pseries()) {
+ barrier();
+ get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse();
+ barrier();
+ }
+#endif
+
+ if (ppc_get_pmu_inuse()) {
+ mtspr(SPRN_PMC1, host_os_sprs->pmc1);
+ mtspr(SPRN_PMC2, host_os_sprs->pmc2);
+ mtspr(SPRN_PMC3, host_os_sprs->pmc3);
+ mtspr(SPRN_PMC4, host_os_sprs->pmc4);
+ mtspr(SPRN_PMC5, host_os_sprs->pmc5);
+ mtspr(SPRN_PMC6, host_os_sprs->pmc6);
+ mtspr(SPRN_MMCR1, host_os_sprs->mmcr1);
+ mtspr(SPRN_MMCR2, host_os_sprs->mmcr2);
+ mtspr(SPRN_SDAR, host_os_sprs->sdar);
+ mtspr(SPRN_SIAR, host_os_sprs->siar);
+ mtspr(SPRN_SIER, host_os_sprs->sier1);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ mtspr(SPRN_MMCR3, host_os_sprs->mmcr3);
+ mtspr(SPRN_SIER2, host_os_sprs->sier2);
+ mtspr(SPRN_SIER3, host_os_sprs->sier3);
+ }
+
+ /* Set MMCRA then MMCR0 last */
+ mtspr(SPRN_MMCRA, host_os_sprs->mmcra);
+ mtspr(SPRN_MMCR0, host_os_sprs->mmcr0);
+ isync();
+ }
+}
+EXPORT_SYMBOL_GPL(switch_pmu_to_host);
+
+static void load_spr_state(struct kvm_vcpu *vcpu,
+ struct p9_host_os_sprs *host_os_sprs)
+{
+ /* TAR is very fast */
+ mtspr(SPRN_TAR, vcpu->arch.tar);
+
+#ifdef CONFIG_ALTIVEC
+ if (cpu_has_feature(CPU_FTR_ALTIVEC) &&
+ current->thread.vrsave != vcpu->arch.vrsave)
+ mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
+#endif
+
+ if (vcpu->arch.hfscr & HFSCR_EBB) {
+ if (current->thread.ebbhr != vcpu->arch.ebbhr)
+ mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
+ if (current->thread.ebbrr != vcpu->arch.ebbrr)
+ mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
+ if (current->thread.bescr != vcpu->arch.bescr)
+ mtspr(SPRN_BESCR, vcpu->arch.bescr);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TIDR) &&
+ current->thread.tidr != vcpu->arch.tid)
+ mtspr(SPRN_TIDR, vcpu->arch.tid);
+ if (host_os_sprs->iamr != vcpu->arch.iamr)
+ mtspr(SPRN_IAMR, vcpu->arch.iamr);
+ if (host_os_sprs->amr != vcpu->arch.amr)
+ mtspr(SPRN_AMR, vcpu->arch.amr);
+ if (vcpu->arch.uamor != 0)
+ mtspr(SPRN_UAMOR, vcpu->arch.uamor);
+ if (current->thread.fscr != vcpu->arch.fscr)
+ mtspr(SPRN_FSCR, vcpu->arch.fscr);
+ if (current->thread.dscr != vcpu->arch.dscr)
+ mtspr(SPRN_DSCR, vcpu->arch.dscr);
+ if (vcpu->arch.pspb != 0)
+ mtspr(SPRN_PSPB, vcpu->arch.pspb);
+
+ /*
+ * DAR, DSISR, and for nested HV, SPRGs must be set with MSR[RI]
+ * clear (or hstate set appropriately to catch those registers
+ * being clobbered if we take a MCE or SRESET), so those are done
+ * later.
+ */
+
+ if (!(vcpu->arch.ctrl & 1))
+ mtspr(SPRN_CTRLT, 0);
+}
+
+static void store_spr_state(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.tar = mfspr(SPRN_TAR);
+
+#ifdef CONFIG_ALTIVEC
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
+#endif
+
+ if (vcpu->arch.hfscr & HFSCR_EBB) {
+ vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
+ vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
+ vcpu->arch.bescr = mfspr(SPRN_BESCR);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TIDR))
+ vcpu->arch.tid = mfspr(SPRN_TIDR);
+ vcpu->arch.iamr = mfspr(SPRN_IAMR);
+ vcpu->arch.amr = mfspr(SPRN_AMR);
+ vcpu->arch.uamor = mfspr(SPRN_UAMOR);
+ vcpu->arch.fscr = mfspr(SPRN_FSCR);
+ vcpu->arch.dscr = mfspr(SPRN_DSCR);
+ vcpu->arch.pspb = mfspr(SPRN_PSPB);
+
+ vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
+}
+
+/* Returns true if current MSR and/or guest MSR may have changed */
+bool load_vcpu_state(struct kvm_vcpu *vcpu,
+ struct p9_host_os_sprs *host_os_sprs)
+{
+ bool ret = false;
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (cpu_has_feature(CPU_FTR_TM) ||
+ cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
+ unsigned long guest_msr = vcpu->arch.shregs.msr;
+ if (MSR_TM_ACTIVE(guest_msr)) {
+ kvmppc_restore_tm_hv(vcpu, guest_msr, true);
+ ret = true;
+ } else if (vcpu->arch.hfscr & HFSCR_TM) {
+ mtspr(SPRN_TEXASR, vcpu->arch.texasr);
+ mtspr(SPRN_TFHAR, vcpu->arch.tfhar);
+ mtspr(SPRN_TFIAR, vcpu->arch.tfiar);
+ }
+ }
+#endif
+
+ load_spr_state(vcpu, host_os_sprs);
+
+ load_fp_state(&vcpu->arch.fp);
+#ifdef CONFIG_ALTIVEC
+ load_vr_state(&vcpu->arch.vr);
+#endif
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(load_vcpu_state);
+
+void store_vcpu_state(struct kvm_vcpu *vcpu)
+{
+ store_spr_state(vcpu);
+
+ store_fp_state(&vcpu->arch.fp);
+#ifdef CONFIG_ALTIVEC
+ store_vr_state(&vcpu->arch.vr);
+#endif
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (cpu_has_feature(CPU_FTR_TM) ||
+ cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
+ unsigned long guest_msr = vcpu->arch.shregs.msr;
+ if (MSR_TM_ACTIVE(guest_msr)) {
+ kvmppc_save_tm_hv(vcpu, guest_msr, true);
+ } else if (vcpu->arch.hfscr & HFSCR_TM) {
+ vcpu->arch.texasr = mfspr(SPRN_TEXASR);
+ vcpu->arch.tfhar = mfspr(SPRN_TFHAR);
+ vcpu->arch.tfiar = mfspr(SPRN_TFIAR);
+
+ if (!vcpu->arch.nested) {
+ vcpu->arch.load_tm++; /* see load_ebb comment */
+ if (!vcpu->arch.load_tm)
+ vcpu->arch.hfscr &= ~HFSCR_TM;
+ }
+ }
+ }
+#endif
+}
+EXPORT_SYMBOL_GPL(store_vcpu_state);
+
+void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs)
+{
+ host_os_sprs->iamr = mfspr(SPRN_IAMR);
+ host_os_sprs->amr = mfspr(SPRN_AMR);
+}
+EXPORT_SYMBOL_GPL(save_p9_host_os_sprs);
+
+/* vcpu guest regs must already be saved */
+void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
+ struct p9_host_os_sprs *host_os_sprs)
+{
+ /*
+ * current->thread.xxx registers must all be restored to host
+ * values before a potential context switch, othrewise the context
+ * switch itself will overwrite current->thread.xxx with the values
+ * from the guest SPRs.
+ */
+
+ mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
+
+ if (cpu_has_feature(CPU_FTR_P9_TIDR) &&
+ current->thread.tidr != vcpu->arch.tid)
+ mtspr(SPRN_TIDR, current->thread.tidr);
+ if (host_os_sprs->iamr != vcpu->arch.iamr)
+ mtspr(SPRN_IAMR, host_os_sprs->iamr);
+ if (vcpu->arch.uamor != 0)
+ mtspr(SPRN_UAMOR, 0);
+ if (host_os_sprs->amr != vcpu->arch.amr)
+ mtspr(SPRN_AMR, host_os_sprs->amr);
+ if (current->thread.fscr != vcpu->arch.fscr)
+ mtspr(SPRN_FSCR, current->thread.fscr);
+ if (current->thread.dscr != vcpu->arch.dscr)
+ mtspr(SPRN_DSCR, current->thread.dscr);
+ if (vcpu->arch.pspb != 0)
+ mtspr(SPRN_PSPB, 0);
+
+ /* Save guest CTRL register, set runlatch to 1 */
+ if (!(vcpu->arch.ctrl & 1))
+ mtspr(SPRN_CTRLT, 1);
+
+#ifdef CONFIG_ALTIVEC
+ if (cpu_has_feature(CPU_FTR_ALTIVEC) &&
+ vcpu->arch.vrsave != current->thread.vrsave)
+ mtspr(SPRN_VRSAVE, current->thread.vrsave);
+#endif
+ if (vcpu->arch.hfscr & HFSCR_EBB) {
+ if (vcpu->arch.bescr != current->thread.bescr)
+ mtspr(SPRN_BESCR, current->thread.bescr);
+ if (vcpu->arch.ebbhr != current->thread.ebbhr)
+ mtspr(SPRN_EBBHR, current->thread.ebbhr);
+ if (vcpu->arch.ebbrr != current->thread.ebbrr)
+ mtspr(SPRN_EBBRR, current->thread.ebbrr);
+
+ if (!vcpu->arch.nested) {
+ /*
+ * This is like load_fp in context switching, turn off
+ * the facility after it wraps the u8 to try avoiding
+ * saving and restoring the registers each partition
+ * switch.
+ */
+ vcpu->arch.load_ebb++;
+ if (!vcpu->arch.load_ebb)
+ vcpu->arch.hfscr &= ~HFSCR_EBB;
+ }
+ }
+
+ if (vcpu->arch.tar != current->thread.tar)
+ mtspr(SPRN_TAR, current->thread.tar);
+}
+EXPORT_SYMBOL_GPL(restore_p9_host_os_sprs);
+
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
static void __start_timing(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next)
{
@@ -56,10 +487,22 @@ static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator
#define accumulate_time(vcpu, next) do {} while (0)
#endif
-static inline void mfslb(unsigned int idx, u64 *slbee, u64 *slbev)
+static inline u64 mfslbv(unsigned int idx)
{
- asm volatile("slbmfev %0,%1" : "=r" (*slbev) : "r" (idx));
- asm volatile("slbmfee %0,%1" : "=r" (*slbee) : "r" (idx));
+ u64 slbev;
+
+ asm volatile("slbmfev %0,%1" : "=r" (slbev) : "r" (idx));
+
+ return slbev;
+}
+
+static inline u64 mfslbe(unsigned int idx)
+{
+ u64 slbee;
+
+ asm volatile("slbmfee %0,%1" : "=r" (slbee) : "r" (idx));
+
+ return slbee;
}
static inline void mtslb(u64 slbee, u64 slbev)
@@ -100,17 +543,19 @@ static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u6
lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
/*
- * All the isync()s are overkill but trivially follow the ISA
- * requirements. Some can likely be replaced with justification
- * comment for why they are not needed.
+ * Prior memory accesses to host PID Q3 must be completed before we
+ * start switching, and stores must be drained to avoid not-my-LPAR
+ * logic (see switch_mmu_to_host).
*/
+ asm volatile("hwsync" ::: "memory");
isync();
mtspr(SPRN_LPID, lpid);
- isync();
mtspr(SPRN_LPCR, lpcr);
- isync();
mtspr(SPRN_PID, vcpu->arch.pid);
- isync();
+ /*
+ * isync not required here because we are HRFID'ing to guest before
+ * any guest context access, which is context synchronising.
+ */
}
static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
@@ -120,25 +565,41 @@ static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64
lpid = kvm->arch.lpid;
+ /*
+ * See switch_mmu_to_guest_radix. ptesync should not be required here
+ * even if the host is in HPT mode because speculative accesses would
+ * not cause RC updates (we are in real mode).
+ */
+ asm volatile("hwsync" ::: "memory");
+ isync();
mtspr(SPRN_LPID, lpid);
mtspr(SPRN_LPCR, lpcr);
mtspr(SPRN_PID, vcpu->arch.pid);
for (i = 0; i < vcpu->arch.slb_max; i++)
mtslb(vcpu->arch.slb[i].orige, vcpu->arch.slb[i].origv);
-
- isync();
+ /*
+ * isync not required here, see switch_mmu_to_guest_radix.
+ */
}
static void switch_mmu_to_host(struct kvm *kvm, u32 pid)
{
+ /*
+ * The guest has exited, so guest MMU context is no longer being
+ * non-speculatively accessed, but a hwsync is needed before the
+ * mtLPIDR / mtPIDR switch, in order to ensure all stores are drained,
+ * so the not-my-LPAR tlbie logic does not overlook them.
+ */
+ asm volatile("hwsync" ::: "memory");
isync();
mtspr(SPRN_PID, pid);
- isync();
mtspr(SPRN_LPID, kvm->arch.host_lpid);
- isync();
mtspr(SPRN_LPCR, kvm->arch.host_lpcr);
- isync();
+ /*
+ * isync is not required after the switch, because mtmsrd with L=0
+ * is performed after this switch, which is context synchronising.
+ */
if (!radix_enabled())
slb_restore_bolted_realmode();
@@ -171,8 +632,10 @@ static void save_clear_guest_mmu(struct kvm *kvm, struct kvm_vcpu *vcpu)
*/
for (i = 0; i < vcpu->arch.slb_nr; i++) {
u64 slbee, slbev;
- mfslb(i, &slbee, &slbev);
+
+ slbee = mfslbe(i);
if (slbee & SLB_ESID_V) {
+ slbev = mfslbv(i);
vcpu->arch.slb[nr].orige = slbee | i;
vcpu->arch.slb[nr].origv = slbev;
nr++;
@@ -183,15 +646,128 @@ static void save_clear_guest_mmu(struct kvm *kvm, struct kvm_vcpu *vcpu)
}
}
-int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr)
+static void flush_guest_tlb(struct kvm *kvm)
{
+ unsigned long rb, set;
+
+ rb = PPC_BIT(52); /* IS = 2 */
+ if (kvm_is_radix(kvm)) {
+ /* R=1 PRS=1 RIC=2 */
+ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+ : : "r" (rb), "i" (1), "i" (1), "i" (2),
+ "r" (0) : "memory");
+ for (set = 1; set < kvm->arch.tlb_sets; ++set) {
+ rb += PPC_BIT(51); /* increment set number */
+ /* R=1 PRS=1 RIC=0 */
+ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+ : : "r" (rb), "i" (1), "i" (1), "i" (0),
+ "r" (0) : "memory");
+ }
+ asm volatile("ptesync": : :"memory");
+ // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now.
+ asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory");
+ } else {
+ for (set = 0; set < kvm->arch.tlb_sets; ++set) {
+ /* R=0 PRS=0 RIC=0 */
+ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+ : : "r" (rb), "i" (0), "i" (0), "i" (0),
+ "r" (0) : "memory");
+ rb += PPC_BIT(51); /* increment set number */
+ }
+ asm volatile("ptesync": : :"memory");
+ // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now.
+ asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory");
+ }
+}
+
+static void check_need_tlb_flush(struct kvm *kvm, int pcpu,
+ struct kvm_nested_guest *nested)
+{
+ cpumask_t *need_tlb_flush;
+ bool all_set = true;
+ int i;
+
+ if (nested)
+ need_tlb_flush = &nested->need_tlb_flush;
+ else
+ need_tlb_flush = &kvm->arch.need_tlb_flush;
+
+ if (likely(!cpumask_test_cpu(pcpu, need_tlb_flush)))
+ return;
+
+ /*
+ * Individual threads can come in here, but the TLB is shared between
+ * the 4 threads in a core, hence invalidating on one thread
+ * invalidates for all, so only invalidate the first time (if all bits
+ * were set. The others must still execute a ptesync.
+ *
+ * If a race occurs and two threads do the TLB flush, that is not a
+ * problem, just sub-optimal.
+ */
+ for (i = cpu_first_tlb_thread_sibling(pcpu);
+ i <= cpu_last_tlb_thread_sibling(pcpu);
+ i += cpu_tlb_thread_sibling_step()) {
+ if (!cpumask_test_cpu(i, need_tlb_flush)) {
+ all_set = false;
+ break;
+ }
+ }
+ if (all_set)
+ flush_guest_tlb(kvm);
+ else
+ asm volatile("ptesync" ::: "memory");
+
+ /* Clear the bit after the TLB flush */
+ cpumask_clear_cpu(pcpu, need_tlb_flush);
+}
+
+unsigned long kvmppc_msr_hard_disable_set_facilities(struct kvm_vcpu *vcpu, unsigned long msr)
+{
+ unsigned long msr_needed = 0;
+
+ msr &= ~MSR_EE;
+
+ /* MSR bits may have been cleared by context switch so must recheck */
+ if (IS_ENABLED(CONFIG_PPC_FPU))
+ msr_needed |= MSR_FP;
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ msr_needed |= MSR_VEC;
+ if (cpu_has_feature(CPU_FTR_VSX))
+ msr_needed |= MSR_VSX;
+ if ((cpu_has_feature(CPU_FTR_TM) ||
+ cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
+ (vcpu->arch.hfscr & HFSCR_TM))
+ msr_needed |= MSR_TM;
+
+ /*
+ * This could be combined with MSR[RI] clearing, but that expands
+ * the unrecoverable window. It would be better to cover unrecoverable
+ * with KVM bad interrupt handling rather than use MSR[RI] at all.
+ *
+ * Much more difficult and less worthwhile to combine with IR/DR
+ * disable.
+ */
+ if ((msr & msr_needed) != msr_needed) {
+ msr |= msr_needed;
+ __mtmsrd(msr, 0);
+ } else {
+ __hard_irq_disable();
+ }
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+ return msr;
+}
+EXPORT_SYMBOL_GPL(kvmppc_msr_hard_disable_set_facilities);
+
+int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb)
+{
+ struct p9_host_os_sprs host_os_sprs;
struct kvm *kvm = vcpu->kvm;
struct kvm_nested_guest *nested = vcpu->arch.nested;
struct kvmppc_vcore *vc = vcpu->arch.vcore;
- s64 hdec;
- u64 tb, purr, spurr;
+ s64 hdec, dec;
+ u64 purr, spurr;
u64 *exsave;
- bool ri_set;
int trap;
unsigned long msr;
unsigned long host_hfscr;
@@ -199,11 +775,13 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
unsigned long host_dawr0;
unsigned long host_dawrx0;
unsigned long host_psscr;
+ unsigned long host_hpsscr;
unsigned long host_pidr;
unsigned long host_dawr1;
unsigned long host_dawrx1;
+ unsigned long dpdes;
- hdec = time_limit - mftb();
+ hdec = time_limit - *tb;
if (hdec < 0)
return BOOK3S_INTERRUPT_HV_DECREMENTER;
@@ -214,51 +792,84 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
vcpu->arch.ceded = 0;
- if (vc->tb_offset) {
- u64 new_tb = mftb() + vc->tb_offset;
- mtspr(SPRN_TBU40, new_tb);
- tb = mftb();
- if ((tb & 0xffffff) < (new_tb & 0xffffff))
- mtspr(SPRN_TBU40, new_tb + 0x1000000);
- vc->tb_offset_applied = vc->tb_offset;
- }
-
- msr = mfmsr();
+ /* Save MSR for restore, with EE clear. */
+ msr = mfmsr() & ~MSR_EE;
host_hfscr = mfspr(SPRN_HFSCR);
host_ciabr = mfspr(SPRN_CIABR);
- host_dawr0 = mfspr(SPRN_DAWR0);
- host_dawrx0 = mfspr(SPRN_DAWRX0);
- host_psscr = mfspr(SPRN_PSSCR);
+ host_psscr = mfspr(SPRN_PSSCR_PR);
+ if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+ host_hpsscr = mfspr(SPRN_PSSCR);
host_pidr = mfspr(SPRN_PID);
- if (cpu_has_feature(CPU_FTR_DAWR1)) {
- host_dawr1 = mfspr(SPRN_DAWR1);
- host_dawrx1 = mfspr(SPRN_DAWRX1);
- }
- if (vc->pcr)
- mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
- mtspr(SPRN_DPDES, vc->dpdes);
- mtspr(SPRN_VTB, vc->vtb);
+ if (dawr_enabled()) {
+ host_dawr0 = mfspr(SPRN_DAWR0);
+ host_dawrx0 = mfspr(SPRN_DAWRX0);
+ if (cpu_has_feature(CPU_FTR_DAWR1)) {
+ host_dawr1 = mfspr(SPRN_DAWR1);
+ host_dawrx1 = mfspr(SPRN_DAWRX1);
+ }
+ }
local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
+
+ save_p9_host_os_sprs(&host_os_sprs);
+
+ msr = kvmppc_msr_hard_disable_set_facilities(vcpu, msr);
+ if (lazy_irq_pending()) {
+ trap = 0;
+ goto out;
+ }
+
+ if (unlikely(load_vcpu_state(vcpu, &host_os_sprs)))
+ msr = mfmsr(); /* MSR may have been updated */
+
+ if (vc->tb_offset) {
+ u64 new_tb = *tb + vc->tb_offset;
+ mtspr(SPRN_TBU40, new_tb);
+ if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) {
+ new_tb += 0x1000000;
+ mtspr(SPRN_TBU40, new_tb);
+ }
+ *tb = new_tb;
+ vc->tb_offset_applied = vc->tb_offset;
+ }
+
+ mtspr(SPRN_VTB, vc->vtb);
mtspr(SPRN_PURR, vcpu->arch.purr);
mtspr(SPRN_SPURR, vcpu->arch.spurr);
+ if (vc->pcr)
+ mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
+ if (vcpu->arch.doorbell_request) {
+ vcpu->arch.doorbell_request = 0;
+ mtspr(SPRN_DPDES, 1);
+ }
+
if (dawr_enabled()) {
- mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
- mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
+ if (vcpu->arch.dawr0 != host_dawr0)
+ mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
+ if (vcpu->arch.dawrx0 != host_dawrx0)
+ mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
if (cpu_has_feature(CPU_FTR_DAWR1)) {
- mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
- mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
+ if (vcpu->arch.dawr1 != host_dawr1)
+ mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
+ if (vcpu->arch.dawrx1 != host_dawrx1)
+ mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
}
}
- mtspr(SPRN_CIABR, vcpu->arch.ciabr);
- mtspr(SPRN_IC, vcpu->arch.ic);
+ if (vcpu->arch.ciabr != host_ciabr)
+ mtspr(SPRN_CIABR, vcpu->arch.ciabr);
- mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
- (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
+
+ if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
+ mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
+ (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
+ } else {
+ if (vcpu->arch.psscr != host_psscr)
+ mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr);
+ }
mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
@@ -276,18 +887,34 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
* HDSI which should correctly update the HDSISR the second time HDSI
* entry.
*
- * Just do this on all p9 processors for now.
+ * The "radix prefetch bug" test can be used to test for this bug, as
+ * it also exists fo DD2.1 and below.
*/
- mtspr(SPRN_HDSISR, HDSISR_CANARY);
+ if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+ mtspr(SPRN_HDSISR, HDSISR_CANARY);
mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
- mtspr(SPRN_AMOR, ~0UL);
+ /*
+ * It might be preferable to load_vcpu_state here, in order to get the
+ * GPR/FP register loads executing in parallel with the previous mtSPR
+ * instructions, but for now that can't be done because the TM handling
+ * in load_vcpu_state can change some SPRs and vcpu state (nip, msr).
+ * But TM could be split out if this would be a significant benefit.
+ */
- local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_HV_P9;
+ /*
+ * MSR[RI] does not need to be cleared (and is not, for radix guests
+ * with no prefetch bug), because in_guest is set. If we take a SRESET
+ * or MCE with in_guest set but still in HV mode, then
+ * kvmppc_p9_bad_interrupt handles the interrupt, which effectively
+ * clears MSR[RI] and doesn't return.
+ */
+ WRITE_ONCE(local_paca->kvm_hstate.in_guest, KVM_GUEST_MODE_HV_P9);
+ barrier(); /* Open in_guest critical section */
/*
* Hash host, hash guest, or radix guest with prefetch bug, all have
@@ -299,17 +926,13 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
save_clear_host_mmu(kvm);
- if (kvm_is_radix(kvm)) {
+ if (kvm_is_radix(kvm))
switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
- if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
- __mtmsrd(0, 1); /* clear RI */
-
- } else {
+ else
switch_mmu_to_guest_hpt(kvm, vcpu, lpcr);
- }
/* TLBIEL uses LPID=LPIDR, so run this after setting guest LPID */
- kvmppc_check_need_tlb_flush(kvm, vc->pcpu, nested);
+ check_need_tlb_flush(kvm, vc->pcpu, nested);
/*
* P9 suppresses the HDEC exception when LPCR[HDICE] = 0,
@@ -317,6 +940,8 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
*/
mtspr(SPRN_HDEC, hdec);
+ mtspr(SPRN_DEC, vcpu->arch.dec_expires - *tb);
+
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
tm_return_to_guest:
#endif
@@ -327,7 +952,9 @@ tm_return_to_guest:
accumulate_time(vcpu, &vcpu->arch.guest_time);
+ switch_pmu_to_guest(vcpu, &host_os_sprs);
kvmppc_p9_enter_guest(vcpu);
+ switch_pmu_to_host(vcpu, &host_os_sprs);
accumulate_time(vcpu, &vcpu->arch.rm_intr);
@@ -340,36 +967,27 @@ tm_return_to_guest:
/* 0x2 bit for HSRR is only used by PR and P7/8 HV paths, clear it */
trap = local_paca->kvm_hstate.scratch0 & ~0x2;
- /* HSRR interrupts leave MSR[RI] unchanged, SRR interrupts clear it. */
- ri_set = false;
- if (likely(trap > BOOK3S_INTERRUPT_MACHINE_CHECK)) {
- if (trap != BOOK3S_INTERRUPT_SYSCALL &&
- (vcpu->arch.shregs.msr & MSR_RI))
- ri_set = true;
+ if (likely(trap > BOOK3S_INTERRUPT_MACHINE_CHECK))
exsave = local_paca->exgen;
- } else if (trap == BOOK3S_INTERRUPT_SYSTEM_RESET) {
+ else if (trap == BOOK3S_INTERRUPT_SYSTEM_RESET)
exsave = local_paca->exnmi;
- } else { /* trap == 0x200 */
+ else /* trap == 0x200 */
exsave = local_paca->exmc;
- }
vcpu->arch.regs.gpr[1] = local_paca->kvm_hstate.scratch1;
vcpu->arch.regs.gpr[3] = local_paca->kvm_hstate.scratch2;
/*
- * Only set RI after reading machine check regs (DAR, DSISR, SRR0/1)
- * and hstate scratch (which we need to move into exsave to make
- * re-entrant vs SRESET/MCE)
+ * After reading machine check regs (DAR, DSISR, SRR0/1) and hstate
+ * scratch (which we need to move into exsave to make re-entrant vs
+ * SRESET/MCE), register state is protected from reentrancy. However
+ * timebase, MMU, among other state is still set to guest, so don't
+ * enable MSR[RI] here. It gets enabled at the end, after in_guest
+ * is cleared.
+ *
+ * It is possible an NMI could come in here, which is why it is
+ * important to save the above state early so it can be debugged.
*/
- if (ri_set) {
- if (unlikely(!(mfmsr() & MSR_RI))) {
- __mtmsrd(MSR_RI, 1);
- WARN_ON_ONCE(1);
- }
- } else {
- WARN_ON_ONCE(mfmsr() & MSR_RI);
- __mtmsrd(MSR_RI, 1);
- }
vcpu->arch.regs.gpr[9] = exsave[EX_R9/sizeof(u64)];
vcpu->arch.regs.gpr[10] = exsave[EX_R10/sizeof(u64)];
@@ -388,7 +1006,7 @@ tm_return_to_guest:
kvmppc_realmode_machine_check(vcpu);
} else if (unlikely(trap == BOOK3S_INTERRUPT_HMI)) {
- kvmppc_realmode_hmi_handler();
+ kvmppc_p9_realmode_hmi_handler(vcpu);
} else if (trap == BOOK3S_INTERRUPT_H_EMUL_ASSIST) {
vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
@@ -427,13 +1045,6 @@ tm_return_to_guest:
*/
mtspr(SPRN_HSRR0, vcpu->arch.regs.nip);
mtspr(SPRN_HSRR1, vcpu->arch.shregs.msr);
-
- /*
- * tm_return_to_guest re-loads SRR0/1, DAR,
- * DSISR after RI is cleared, in case they had
- * been clobbered by a MCE.
- */
- __mtmsrd(0, 1); /* clear RI */
goto tm_return_to_guest;
}
}
@@ -445,81 +1056,109 @@ tm_return_to_guest:
/* Advance host PURR/SPURR by the amount used by guest */
purr = mfspr(SPRN_PURR);
spurr = mfspr(SPRN_SPURR);
- mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr +
- purr - vcpu->arch.purr);
- mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr +
- spurr - vcpu->arch.spurr);
+ local_paca->kvm_hstate.host_purr += purr - vcpu->arch.purr;
+ local_paca->kvm_hstate.host_spurr += spurr - vcpu->arch.spurr;
vcpu->arch.purr = purr;
vcpu->arch.spurr = spurr;
vcpu->arch.ic = mfspr(SPRN_IC);
vcpu->arch.pid = mfspr(SPRN_PID);
- vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS;
+ vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0);
vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1);
vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
- /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
- mtspr(SPRN_PSSCR, host_psscr |
- (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
- mtspr(SPRN_HFSCR, host_hfscr);
- mtspr(SPRN_CIABR, host_ciabr);
- mtspr(SPRN_DAWR0, host_dawr0);
- mtspr(SPRN_DAWRX0, host_dawrx0);
- if (cpu_has_feature(CPU_FTR_DAWR1)) {
- mtspr(SPRN_DAWR1, host_dawr1);
- mtspr(SPRN_DAWRX1, host_dawrx1);
- }
-
- if (kvm_is_radix(kvm)) {
- /*
- * Since this is radix, do a eieio; tlbsync; ptesync sequence
- * in case we interrupted the guest between a tlbie and a
- * ptesync.
- */
- asm volatile("eieio; tlbsync; ptesync");
- }
+ dpdes = mfspr(SPRN_DPDES);
+ if (dpdes)
+ vcpu->arch.doorbell_request = 1;
- /*
- * cp_abort is required if the processor supports local copy-paste
- * to clear the copy buffer that was under control of the guest.
- */
- if (cpu_has_feature(CPU_FTR_ARCH_31))
- asm volatile(PPC_CP_ABORT);
-
- vc->dpdes = mfspr(SPRN_DPDES);
vc->vtb = mfspr(SPRN_VTB);
- mtspr(SPRN_DPDES, 0);
- if (vc->pcr)
- mtspr(SPRN_PCR, PCR_MASK);
+
+ dec = mfspr(SPRN_DEC);
+ if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
+ dec = (s32) dec;
+ *tb = mftb();
+ vcpu->arch.dec_expires = dec + *tb;
if (vc->tb_offset_applied) {
- u64 new_tb = mftb() - vc->tb_offset_applied;
+ u64 new_tb = *tb - vc->tb_offset_applied;
mtspr(SPRN_TBU40, new_tb);
- tb = mftb();
- if ((tb & 0xffffff) < (new_tb & 0xffffff))
- mtspr(SPRN_TBU40, new_tb + 0x1000000);
+ if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) {
+ new_tb += 0x1000000;
+ mtspr(SPRN_TBU40, new_tb);
+ }
+ *tb = new_tb;
vc->tb_offset_applied = 0;
}
- mtspr(SPRN_HDEC, 0x7fffffff);
-
save_clear_guest_mmu(kvm, vcpu);
switch_mmu_to_host(kvm, host_pidr);
- local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_NONE;
/*
- * If we are in real mode, only switch MMU on after the MMU is
- * switched to host, to avoid the P9_RADIX_PREFETCH_BUG.
+ * Enable MSR here in order to have facilities enabled to save
+ * guest registers. This enables MMU (if we were in realmode), so
+ * only switch MMU on after the MMU is switched to host, to avoid
+ * the P9_RADIX_PREFETCH_BUG or hash guest context.
*/
if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
- vcpu->arch.shregs.msr & MSR_TS_MASK)
+ vcpu->arch.shregs.msr & MSR_TS_MASK)
msr |= MSR_TS_S;
-
__mtmsrd(msr, 0);
+ store_vcpu_state(vcpu);
+
+ mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr);
+ mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr);
+
+ if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
+ /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
+ mtspr(SPRN_PSSCR, host_hpsscr |
+ (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
+ }
+
+ mtspr(SPRN_HFSCR, host_hfscr);
+ if (vcpu->arch.ciabr != host_ciabr)
+ mtspr(SPRN_CIABR, host_ciabr);
+
+ if (dawr_enabled()) {
+ if (vcpu->arch.dawr0 != host_dawr0)
+ mtspr(SPRN_DAWR0, host_dawr0);
+ if (vcpu->arch.dawrx0 != host_dawrx0)
+ mtspr(SPRN_DAWRX0, host_dawrx0);
+ if (cpu_has_feature(CPU_FTR_DAWR1)) {
+ if (vcpu->arch.dawr1 != host_dawr1)
+ mtspr(SPRN_DAWR1, host_dawr1);
+ if (vcpu->arch.dawrx1 != host_dawrx1)
+ mtspr(SPRN_DAWRX1, host_dawrx1);
+ }
+ }
+
+ if (dpdes)
+ mtspr(SPRN_DPDES, 0);
+ if (vc->pcr)
+ mtspr(SPRN_PCR, PCR_MASK);
+
+ /* HDEC must be at least as large as DEC, so decrementer_max fits */
+ mtspr(SPRN_HDEC, decrementer_max);
+
+ timer_rearm_host_dec(*tb);
+
+ restore_p9_host_os_sprs(vcpu, &host_os_sprs);
+
+ barrier(); /* Close in_guest critical section */
+ WRITE_ONCE(local_paca->kvm_hstate.in_guest, KVM_GUEST_MODE_NONE);
+ /* Interrupts are recoverable at this point */
+
+ /*
+ * cp_abort is required if the processor supports local copy-paste
+ * to clear the copy buffer that was under control of the guest.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ asm volatile(PPC_CP_ABORT);
+
+out:
end_timing(vcpu);
return trap;
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index d4bca93b79f6..ccfd96965630 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -136,6 +136,60 @@ void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
vcpu->arch.mce_evt = mce_evt;
}
+
+long kvmppc_p9_realmode_hmi_handler(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ long ret = 0;
+
+ /*
+ * Unapply and clear the offset first. That way, if the TB was not
+ * resynced then it will remain in host-offset, and if it was resynced
+ * then it is brought into host-offset. Then the tb offset is
+ * re-applied before continuing with the KVM exit.
+ *
+ * This way, we don't need to actually know whether not OPAL resynced
+ * the timebase or do any of the complicated dance that the P7/8
+ * path requires.
+ */
+ if (vc->tb_offset_applied) {
+ u64 new_tb = mftb() - vc->tb_offset_applied;
+ mtspr(SPRN_TBU40, new_tb);
+ if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) {
+ new_tb += 0x1000000;
+ mtspr(SPRN_TBU40, new_tb);
+ }
+ vc->tb_offset_applied = 0;
+ }
+
+ local_paca->hmi_irqs++;
+
+ if (hmi_handle_debugtrig(NULL) >= 0) {
+ ret = 1;
+ goto out;
+ }
+
+ if (ppc_md.hmi_exception_early)
+ ppc_md.hmi_exception_early(NULL);
+
+out:
+ if (vc->tb_offset) {
+ u64 new_tb = mftb() + vc->tb_offset;
+ mtspr(SPRN_TBU40, new_tb);
+ if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) {
+ new_tb += 0x1000000;
+ mtspr(SPRN_TBU40, new_tb);
+ }
+ vc->tb_offset_applied = vc->tb_offset;
+ }
+
+ return ret;
+}
+
+/*
+ * The following subcore HMI handling is all only for pre-POWER9 CPUs.
+ */
+
/* Check if dynamic split is in force and return subcore size accordingly. */
static inline int kvmppc_cur_subcore_size(void)
{
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 2c1f3c6e72d1..2257fb18cb72 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -55,12 +55,6 @@ static int global_invalidates(struct kvm *kvm)
smp_wmb();
cpumask_setall(&kvm->arch.need_tlb_flush);
cpu = local_paca->kvm_hstate.kvm_vcore->pcpu;
- /*
- * On POWER9, threads are independent but the TLB is shared,
- * so use the bit for the first thread to represent the core.
- */
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- cpu = cpu_first_tlb_thread_sibling(cpu);
cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush);
}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 32a4b4d412b9..d185dee26026 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -778,17 +778,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
/* Restore AMR and UAMOR, set AMOR to all 1s */
ld r5,VCPU_AMR(r4)
ld r6,VCPU_UAMOR(r4)
- li r7,-1
mtspr SPRN_AMR,r5
mtspr SPRN_UAMOR,r6
- mtspr SPRN_AMOR,r7
- /* Restore state of CTRL run bit; assume 1 on entry */
+ /* Restore state of CTRL run bit; the host currently has it set to 1 */
lwz r5,VCPU_CTRL(r4)
andi. r5,r5,1
bne 4f
- mfspr r6,SPRN_CTRLF
- clrrdi r6,r6,1
+ li r6,0
mtspr SPRN_CTRLT,r6
4:
/* Secondary threads wait for primary to have done partition switch */
@@ -817,10 +814,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
* Set the decrementer to the guest decrementer.
*/
ld r8,VCPU_DEC_EXPIRES(r4)
- /* r8 is a host timebase value here, convert to guest TB */
- ld r5,HSTATE_KVM_VCORE(r13)
- ld r6,VCORE_TB_OFFSET_APPL(r5)
- add r8,r8,r6
mftb r7
subf r3,r7,r8
mtspr SPRN_DEC,r3
@@ -1195,9 +1188,6 @@ guest_bypass:
mftb r6
extsw r5,r5
16: add r5,r5,r6
- /* r5 is a guest timebase value here, convert to host TB */
- ld r4,VCORE_TB_OFFSET_APPL(r3)
- subf r5,r4,r5
std r5,VCPU_DEC_EXPIRES(r9)
/* Increment exit count, poke other threads to exit */
@@ -1211,12 +1201,12 @@ guest_bypass:
stw r0, VCPU_CPU(r9)
stw r0, VCPU_THREAD_CPU(r9)
- /* Save guest CTRL register, set runlatch to 1 */
+ /* Save guest CTRL register, set runlatch to 1 if it was clear */
mfspr r6,SPRN_CTRLF
stw r6,VCPU_CTRL(r9)
andi. r0,r6,1
bne 4f
- ori r6,r6,1
+ li r6,1
mtspr SPRN_CTRLT,r6
4:
/*
@@ -2163,9 +2153,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM)
/* save expiry time of guest decrementer */
add r3, r3, r5
ld r4, HSTATE_KVM_VCPU(r13)
- ld r5, HSTATE_KVM_VCORE(r13)
- ld r6, VCORE_TB_OFFSET_APPL(r5)
- subf r3, r6, r3 /* convert to host TB value */
std r3, VCPU_DEC_EXPIRES(r4)
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
@@ -2186,8 +2173,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM)
* Also clear the runlatch bit before napping.
*/
kvm_do_nap:
- mfspr r0, SPRN_CTRLF
- clrrdi r0, r0, 1
+ li r0,0
mtspr SPRN_CTRLT, r0
li r0,1
@@ -2206,8 +2192,7 @@ kvm_nap_sequence: /* desired LPCR value in r5 */
bl isa206_idle_insn_mayloss
- mfspr r0, SPRN_CTRLF
- ori r0, r0, 1
+ li r0,1
mtspr SPRN_CTRLT, r0
mtspr SPRN_SRR1, r3
@@ -2264,9 +2249,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM)
/* Restore guest decrementer */
ld r3, VCPU_DEC_EXPIRES(r4)
- ld r5, HSTATE_KVM_VCORE(r13)
- ld r6, VCORE_TB_OFFSET_APPL(r5)
- add r3, r3, r6 /* convert host TB to guest TB value */
mftb r7
subf r3, r7, r3
mtspr SPRN_DEC, r3
@@ -2711,8 +2693,7 @@ kvmppc_bad_host_intr:
std r0, GPR0(r1)
std r9, GPR1(r1)
std r2, GPR2(r1)
- SAVE_4GPRS(3, r1)
- SAVE_2GPRS(7, r1)
+ SAVE_GPRS(3, 8, r1)
srdi r0, r12, 32
clrldi r12, r12, 32
std r0, _CCR(r1)
@@ -2735,7 +2716,7 @@ kvmppc_bad_host_intr:
ld r9, HSTATE_SCRATCH2(r13)
ld r12, HSTATE_SCRATCH0(r13)
GET_SCRATCH0(r0)
- SAVE_4GPRS(9, r1)
+ SAVE_GPRS(9, 12, r1)
std r0, GPR13(r1)
SAVE_NVGPRS(r1)
ld r5, HSTATE_CFAR(r13)
@@ -2778,10 +2759,11 @@ kvmppc_msr_interrupt:
blr
/*
+ * void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu)
+ *
* Load up guest PMU state. R3 points to the vcpu struct.
*/
-_GLOBAL(kvmhv_load_guest_pmu)
-EXPORT_SYMBOL_GPL(kvmhv_load_guest_pmu)
+kvmhv_load_guest_pmu:
mr r4, r3
mflr r0
li r3, 1
@@ -2816,26 +2798,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
mtspr SPRN_SIAR, r7
mtspr SPRN_SDAR, r8
BEGIN_FTR_SECTION
- ld r5, VCPU_MMCR + 24(r4)
- ld r6, VCPU_SIER + 8(r4)
- ld r7, VCPU_SIER + 16(r4)
- mtspr SPRN_MMCR3, r5
- mtspr SPRN_SIER2, r6
- mtspr SPRN_SIER3, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
-BEGIN_FTR_SECTION
ld r5, VCPU_MMCR + 16(r4)
ld r6, VCPU_SIER(r4)
mtspr SPRN_MMCR2, r5
mtspr SPRN_SIER, r6
-BEGIN_FTR_SECTION_NESTED(96)
lwz r7, VCPU_PMC + 24(r4)
lwz r8, VCPU_PMC + 28(r4)
ld r9, VCPU_MMCRS(r4)
mtspr SPRN_SPMC1, r7
mtspr SPRN_SPMC2, r8
mtspr SPRN_MMCRS, r9
-END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
mtspr SPRN_MMCR0, r3
isync
@@ -2843,10 +2815,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
blr
/*
+ * void kvmhv_load_host_pmu(void)
+ *
* Reload host PMU state saved in the PACA by kvmhv_save_host_pmu.
*/
-_GLOBAL(kvmhv_load_host_pmu)
-EXPORT_SYMBOL_GPL(kvmhv_load_host_pmu)
+kvmhv_load_host_pmu:
mflr r0
lbz r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */
cmpwi r4, 0
@@ -2884,25 +2857,18 @@ BEGIN_FTR_SECTION
mtspr SPRN_MMCR2, r8
mtspr SPRN_SIER, r9
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-BEGIN_FTR_SECTION
- ld r5, HSTATE_MMCR3(r13)
- ld r6, HSTATE_SIER2(r13)
- ld r7, HSTATE_SIER3(r13)
- mtspr SPRN_MMCR3, r5
- mtspr SPRN_SIER2, r6
- mtspr SPRN_SIER3, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
mtspr SPRN_MMCR0, r3
isync
mtlr r0
23: blr
/*
+ * void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use)
+ *
* Save guest PMU state into the vcpu struct.
* r3 = vcpu, r4 = full save flag (PMU in use flag set in VPA)
*/
-_GLOBAL(kvmhv_save_guest_pmu)
-EXPORT_SYMBOL_GPL(kvmhv_save_guest_pmu)
+kvmhv_save_guest_pmu:
mr r9, r3
mr r8, r4
BEGIN_FTR_SECTION
@@ -2951,14 +2917,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
BEGIN_FTR_SECTION
std r10, VCPU_MMCR + 16(r9)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-BEGIN_FTR_SECTION
- mfspr r5, SPRN_MMCR3
- mfspr r6, SPRN_SIER2
- mfspr r7, SPRN_SIER3
- std r5, VCPU_MMCR + 24(r9)
- std r6, VCPU_SIER + 8(r9)
- std r7, VCPU_SIER + 16(r9)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
std r7, VCPU_SIAR(r9)
std r8, VCPU_SDAR(r9)
mfspr r3, SPRN_PMC1
@@ -2976,7 +2934,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
BEGIN_FTR_SECTION
mfspr r5, SPRN_SIER
std r5, VCPU_SIER(r9)
-BEGIN_FTR_SECTION_NESTED(96)
mfspr r6, SPRN_SPMC1
mfspr r7, SPRN_SPMC2
mfspr r8, SPRN_MMCRS
@@ -2985,7 +2942,6 @@ BEGIN_FTR_SECTION_NESTED(96)
std r8, VCPU_MMCRS(r9)
lis r4, 0x8000
mtspr SPRN_MMCRS, r4
-END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
22: blr
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 9e5d0f413b71..5d1881d2e39a 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -19,7 +19,12 @@ CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING
CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING
endif
-obj-y += alloc.o code-patching.o feature-fixups.o pmem.o test_code-patching.o
+CFLAGS_code-patching.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+CFLAGS_feature-fixups.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+
+obj-y += alloc.o code-patching.o feature-fixups.o pmem.o
+
+obj-$(CONFIG_CODE_PATCHING_SELFTEST) += test-code-patching.o
ifndef CONFIG_KASAN
obj-y += string.o memcmp_$(BITS).o
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index c5ed98823835..906d43463366 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -3,22 +3,18 @@
* Copyright 2008 Michael Ellerman, IBM Corporation.
*/
-#include <linux/kernel.h>
#include <linux/kprobes.h>
#include <linux/vmalloc.h>
#include <linux/init.h>
-#include <linux/mm.h>
#include <linux/cpuhotplug.h>
-#include <linux/slab.h>
#include <linux/uaccess.h>
#include <asm/tlbflush.h>
#include <asm/page.h>
#include <asm/code-patching.h>
-#include <asm/setup.h>
#include <asm/inst.h>
-static int __patch_instruction(u32 *exec_addr, struct ppc_inst instr, u32 *patch_addr)
+static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr)
{
if (!ppc_inst_prefixed(instr)) {
u32 val = ppc_inst_val(instr);
@@ -39,7 +35,7 @@ failed:
return -EFAULT;
}
-int raw_patch_instruction(u32 *addr, struct ppc_inst instr)
+int raw_patch_instruction(u32 *addr, ppc_inst_t instr)
{
return __patch_instruction(addr, instr, addr);
}
@@ -86,23 +82,16 @@ void __init poking_init(void)
static int map_patch_area(void *addr, unsigned long text_poke_addr)
{
unsigned long pfn;
- int err;
if (is_vmalloc_or_module_addr(addr))
pfn = vmalloc_to_pfn(addr);
else
pfn = __pa_symbol(addr) >> PAGE_SHIFT;
- err = map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL);
-
- pr_devel("Mapped addr %lx with pfn %lx:%d\n", text_poke_addr, pfn, err);
- if (err)
- return -1;
-
- return 0;
+ return map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL);
}
-static inline int unmap_patch_area(unsigned long addr)
+static void unmap_patch_area(unsigned long addr)
{
pte_t *ptep;
pmd_t *pmdp;
@@ -111,43 +100,56 @@ static inline int unmap_patch_area(unsigned long addr)
pgd_t *pgdp;
pgdp = pgd_offset_k(addr);
- if (unlikely(!pgdp))
- return -EINVAL;
+ if (WARN_ON(pgd_none(*pgdp)))
+ return;
p4dp = p4d_offset(pgdp, addr);
- if (unlikely(!p4dp))
- return -EINVAL;
+ if (WARN_ON(p4d_none(*p4dp)))
+ return;
pudp = pud_offset(p4dp, addr);
- if (unlikely(!pudp))
- return -EINVAL;
+ if (WARN_ON(pud_none(*pudp)))
+ return;
pmdp = pmd_offset(pudp, addr);
- if (unlikely(!pmdp))
- return -EINVAL;
+ if (WARN_ON(pmd_none(*pmdp)))
+ return;
ptep = pte_offset_kernel(pmdp, addr);
- if (unlikely(!ptep))
- return -EINVAL;
-
- pr_devel("clearing mm %p, pte %p, addr %lx\n", &init_mm, ptep, addr);
+ if (WARN_ON(pte_none(*ptep)))
+ return;
/*
* In hash, pte_clear flushes the tlb, in radix, we have to
*/
pte_clear(&init_mm, addr, ptep);
flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+}
- return 0;
+static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
+{
+ int err;
+ u32 *patch_addr;
+ unsigned long text_poke_addr;
+
+ text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr;
+ patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
+
+ err = map_patch_area(addr, text_poke_addr);
+ if (err)
+ return err;
+
+ err = __patch_instruction(addr, instr, patch_addr);
+
+ unmap_patch_area(text_poke_addr);
+
+ return err;
}
-static int do_patch_instruction(u32 *addr, struct ppc_inst instr)
+static int do_patch_instruction(u32 *addr, ppc_inst_t instr)
{
int err;
- u32 *patch_addr = NULL;
unsigned long flags;
- unsigned long text_poke_addr;
- unsigned long kaddr = (unsigned long)addr;
/*
* During early early boot patch_instruction is called
@@ -158,51 +160,37 @@ static int do_patch_instruction(u32 *addr, struct ppc_inst instr)
return raw_patch_instruction(addr, instr);
local_irq_save(flags);
-
- text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr;
- if (map_patch_area(addr, text_poke_addr)) {
- err = -1;
- goto out;
- }
-
- patch_addr = (u32 *)(text_poke_addr + (kaddr & ~PAGE_MASK));
-
- __patch_instruction(addr, instr, patch_addr);
-
- err = unmap_patch_area(text_poke_addr);
- if (err)
- pr_warn("failed to unmap %lx\n", text_poke_addr);
-
-out:
+ err = __do_patch_instruction(addr, instr);
local_irq_restore(flags);
return err;
}
#else /* !CONFIG_STRICT_KERNEL_RWX */
-static int do_patch_instruction(u32 *addr, struct ppc_inst instr)
+static int do_patch_instruction(u32 *addr, ppc_inst_t instr)
{
return raw_patch_instruction(addr, instr);
}
#endif /* CONFIG_STRICT_KERNEL_RWX */
-int patch_instruction(u32 *addr, struct ppc_inst instr)
+int patch_instruction(u32 *addr, ppc_inst_t instr)
{
/* Make sure we aren't patching a freed init section */
- if (init_mem_is_free && init_section_contains(addr, 4)) {
- pr_debug("Skipping init section patching addr: 0x%px\n", addr);
+ if (system_state >= SYSTEM_FREEING_INITMEM && init_section_contains(addr, 4))
return 0;
- }
+
return do_patch_instruction(addr, instr);
}
NOKPROBE_SYMBOL(patch_instruction);
int patch_branch(u32 *addr, unsigned long target, int flags)
{
- struct ppc_inst instr;
+ ppc_inst_t instr;
+
+ if (create_branch(&instr, addr, target, flags))
+ return -ERANGE;
- create_branch(&instr, addr, target, flags);
return patch_instruction(addr, instr);
}
@@ -237,7 +225,7 @@ bool is_offset_in_cond_branch_range(long offset)
* Helper to check if a given instruction is a conditional branch
* Derived from the conditional checks in analyse_instr()
*/
-bool is_conditional_branch(struct ppc_inst instr)
+bool is_conditional_branch(ppc_inst_t instr)
{
unsigned int opcode = ppc_inst_primary_opcode(instr);
@@ -255,7 +243,7 @@ bool is_conditional_branch(struct ppc_inst instr)
}
NOKPROBE_SYMBOL(is_conditional_branch);
-int create_branch(struct ppc_inst *instr, const u32 *addr,
+int create_branch(ppc_inst_t *instr, const u32 *addr,
unsigned long target, int flags)
{
long offset;
@@ -275,7 +263,7 @@ int create_branch(struct ppc_inst *instr, const u32 *addr,
return 0;
}
-int create_cond_branch(struct ppc_inst *instr, const u32 *addr,
+int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
unsigned long target, int flags)
{
long offset;
@@ -294,22 +282,7 @@ int create_cond_branch(struct ppc_inst *instr, const u32 *addr,
return 0;
}
-static unsigned int branch_opcode(struct ppc_inst instr)
-{
- return ppc_inst_primary_opcode(instr) & 0x3F;
-}
-
-static int instr_is_branch_iform(struct ppc_inst instr)
-{
- return branch_opcode(instr) == 18;
-}
-
-static int instr_is_branch_bform(struct ppc_inst instr)
-{
- return branch_opcode(instr) == 16;
-}
-
-int instr_is_relative_branch(struct ppc_inst instr)
+int instr_is_relative_branch(ppc_inst_t instr)
{
if (ppc_inst_val(instr) & BRANCH_ABSOLUTE)
return 0;
@@ -317,7 +290,7 @@ int instr_is_relative_branch(struct ppc_inst instr)
return instr_is_branch_iform(instr) || instr_is_branch_bform(instr);
}
-int instr_is_relative_link_branch(struct ppc_inst instr)
+int instr_is_relative_link_branch(ppc_inst_t instr)
{
return instr_is_relative_branch(instr) && (ppc_inst_val(instr) & BRANCH_SET_LINK);
}
@@ -364,7 +337,7 @@ unsigned long branch_target(const u32 *instr)
return 0;
}
-int translate_branch(struct ppc_inst *instr, const u32 *dest, const u32 *src)
+int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src)
{
unsigned long target;
target = branch_target(src);
@@ -378,375 +351,3 @@ int translate_branch(struct ppc_inst *instr, const u32 *dest, const u32 *src)
return 1;
}
-
-#ifdef CONFIG_PPC_BOOK3E_64
-void __patch_exception(int exc, unsigned long addr)
-{
- extern unsigned int interrupt_base_book3e;
- unsigned int *ibase = &interrupt_base_book3e;
-
- /* Our exceptions vectors start with a NOP and -then- a branch
- * to deal with single stepping from userspace which stops on
- * the second instruction. Thus we need to patch the second
- * instruction of the exception, not the first one
- */
-
- patch_branch(ibase + (exc / 4) + 1, addr, 0);
-}
-#endif
-
-#ifdef CONFIG_CODE_PATCHING_SELFTEST
-
-static int instr_is_branch_to_addr(const u32 *instr, unsigned long addr)
-{
- if (instr_is_branch_iform(ppc_inst_read(instr)) ||
- instr_is_branch_bform(ppc_inst_read(instr)))
- return branch_target(instr) == addr;
-
- return 0;
-}
-
-static void __init test_trampoline(void)
-{
- asm ("nop;\n");
-}
-
-#define check(x) \
- if (!(x)) printk("code-patching: test failed at line %d\n", __LINE__);
-
-static void __init test_branch_iform(void)
-{
- int err;
- struct ppc_inst instr;
- u32 tmp[2];
- u32 *iptr = tmp;
- unsigned long addr = (unsigned long)tmp;
-
- /* The simplest case, branch to self, no flags */
- check(instr_is_branch_iform(ppc_inst(0x48000000)));
- /* All bits of target set, and flags */
- check(instr_is_branch_iform(ppc_inst(0x4bffffff)));
- /* High bit of opcode set, which is wrong */
- check(!instr_is_branch_iform(ppc_inst(0xcbffffff)));
- /* Middle bits of opcode set, which is wrong */
- check(!instr_is_branch_iform(ppc_inst(0x7bffffff)));
-
- /* Simplest case, branch to self with link */
- check(instr_is_branch_iform(ppc_inst(0x48000001)));
- /* All bits of targets set */
- check(instr_is_branch_iform(ppc_inst(0x4bfffffd)));
- /* Some bits of targets set */
- check(instr_is_branch_iform(ppc_inst(0x4bff00fd)));
- /* Must be a valid branch to start with */
- check(!instr_is_branch_iform(ppc_inst(0x7bfffffd)));
-
- /* Absolute branch to 0x100 */
- patch_instruction(iptr, ppc_inst(0x48000103));
- check(instr_is_branch_to_addr(iptr, 0x100));
- /* Absolute branch to 0x420fc */
- patch_instruction(iptr, ppc_inst(0x480420ff));
- check(instr_is_branch_to_addr(iptr, 0x420fc));
- /* Maximum positive relative branch, + 20MB - 4B */
- patch_instruction(iptr, ppc_inst(0x49fffffc));
- check(instr_is_branch_to_addr(iptr, addr + 0x1FFFFFC));
- /* Smallest negative relative branch, - 4B */
- patch_instruction(iptr, ppc_inst(0x4bfffffc));
- check(instr_is_branch_to_addr(iptr, addr - 4));
- /* Largest negative relative branch, - 32 MB */
- patch_instruction(iptr, ppc_inst(0x4a000000));
- check(instr_is_branch_to_addr(iptr, addr - 0x2000000));
-
- /* Branch to self, with link */
- err = create_branch(&instr, iptr, addr, BRANCH_SET_LINK);
- patch_instruction(iptr, instr);
- check(instr_is_branch_to_addr(iptr, addr));
-
- /* Branch to self - 0x100, with link */
- err = create_branch(&instr, iptr, addr - 0x100, BRANCH_SET_LINK);
- patch_instruction(iptr, instr);
- check(instr_is_branch_to_addr(iptr, addr - 0x100));
-
- /* Branch to self + 0x100, no link */
- err = create_branch(&instr, iptr, addr + 0x100, 0);
- patch_instruction(iptr, instr);
- check(instr_is_branch_to_addr(iptr, addr + 0x100));
-
- /* Maximum relative negative offset, - 32 MB */
- err = create_branch(&instr, iptr, addr - 0x2000000, BRANCH_SET_LINK);
- patch_instruction(iptr, instr);
- check(instr_is_branch_to_addr(iptr, addr - 0x2000000));
-
- /* Out of range relative negative offset, - 32 MB + 4*/
- err = create_branch(&instr, iptr, addr - 0x2000004, BRANCH_SET_LINK);
- check(err);
-
- /* Out of range relative positive offset, + 32 MB */
- err = create_branch(&instr, iptr, addr + 0x2000000, BRANCH_SET_LINK);
- check(err);
-
- /* Unaligned target */
- err = create_branch(&instr, iptr, addr + 3, BRANCH_SET_LINK);
- check(err);
-
- /* Check flags are masked correctly */
- err = create_branch(&instr, iptr, addr, 0xFFFFFFFC);
- patch_instruction(iptr, instr);
- check(instr_is_branch_to_addr(iptr, addr));
- check(ppc_inst_equal(instr, ppc_inst(0x48000000)));
-}
-
-static void __init test_create_function_call(void)
-{
- u32 *iptr;
- unsigned long dest;
- struct ppc_inst instr;
-
- /* Check we can create a function call */
- iptr = (u32 *)ppc_function_entry(test_trampoline);
- dest = ppc_function_entry(test_create_function_call);
- create_branch(&instr, iptr, dest, BRANCH_SET_LINK);
- patch_instruction(iptr, instr);
- check(instr_is_branch_to_addr(iptr, dest));
-}
-
-static void __init test_branch_bform(void)
-{
- int err;
- unsigned long addr;
- struct ppc_inst instr;
- u32 tmp[2];
- u32 *iptr = tmp;
- unsigned int flags;
-
- addr = (unsigned long)iptr;
-
- /* The simplest case, branch to self, no flags */
- check(instr_is_branch_bform(ppc_inst(0x40000000)));
- /* All bits of target set, and flags */
- check(instr_is_branch_bform(ppc_inst(0x43ffffff)));
- /* High bit of opcode set, which is wrong */
- check(!instr_is_branch_bform(ppc_inst(0xc3ffffff)));
- /* Middle bits of opcode set, which is wrong */
- check(!instr_is_branch_bform(ppc_inst(0x7bffffff)));
-
- /* Absolute conditional branch to 0x100 */
- patch_instruction(iptr, ppc_inst(0x43ff0103));
- check(instr_is_branch_to_addr(iptr, 0x100));
- /* Absolute conditional branch to 0x20fc */
- patch_instruction(iptr, ppc_inst(0x43ff20ff));
- check(instr_is_branch_to_addr(iptr, 0x20fc));
- /* Maximum positive relative conditional branch, + 32 KB - 4B */
- patch_instruction(iptr, ppc_inst(0x43ff7ffc));
- check(instr_is_branch_to_addr(iptr, addr + 0x7FFC));
- /* Smallest negative relative conditional branch, - 4B */
- patch_instruction(iptr, ppc_inst(0x43fffffc));
- check(instr_is_branch_to_addr(iptr, addr - 4));
- /* Largest negative relative conditional branch, - 32 KB */
- patch_instruction(iptr, ppc_inst(0x43ff8000));
- check(instr_is_branch_to_addr(iptr, addr - 0x8000));
-
- /* All condition code bits set & link */
- flags = 0x3ff000 | BRANCH_SET_LINK;
-
- /* Branch to self */
- err = create_cond_branch(&instr, iptr, addr, flags);
- patch_instruction(iptr, instr);
- check(instr_is_branch_to_addr(iptr, addr));
-
- /* Branch to self - 0x100 */
- err = create_cond_branch(&instr, iptr, addr - 0x100, flags);
- patch_instruction(iptr, instr);
- check(instr_is_branch_to_addr(iptr, addr - 0x100));
-
- /* Branch to self + 0x100 */
- err = create_cond_branch(&instr, iptr, addr + 0x100, flags);
- patch_instruction(iptr, instr);
- check(instr_is_branch_to_addr(iptr, addr + 0x100));
-
- /* Maximum relative negative offset, - 32 KB */
- err = create_cond_branch(&instr, iptr, addr - 0x8000, flags);
- patch_instruction(iptr, instr);
- check(instr_is_branch_to_addr(iptr, addr - 0x8000));
-
- /* Out of range relative negative offset, - 32 KB + 4*/
- err = create_cond_branch(&instr, iptr, addr - 0x8004, flags);
- check(err);
-
- /* Out of range relative positive offset, + 32 KB */
- err = create_cond_branch(&instr, iptr, addr + 0x8000, flags);
- check(err);
-
- /* Unaligned target */
- err = create_cond_branch(&instr, iptr, addr + 3, flags);
- check(err);
-
- /* Check flags are masked correctly */
- err = create_cond_branch(&instr, iptr, addr, 0xFFFFFFFC);
- patch_instruction(iptr, instr);
- check(instr_is_branch_to_addr(iptr, addr));
- check(ppc_inst_equal(instr, ppc_inst(0x43FF0000)));
-}
-
-static void __init test_translate_branch(void)
-{
- unsigned long addr;
- void *p, *q;
- struct ppc_inst instr;
- void *buf;
-
- buf = vmalloc(PAGE_ALIGN(0x2000000 + 1));
- check(buf);
- if (!buf)
- return;
-
- /* Simple case, branch to self moved a little */
- p = buf;
- addr = (unsigned long)p;
- patch_branch(p, addr, 0);
- check(instr_is_branch_to_addr(p, addr));
- q = p + 4;
- translate_branch(&instr, q, p);
- patch_instruction(q, instr);
- check(instr_is_branch_to_addr(q, addr));
-
- /* Maximum negative case, move b . to addr + 32 MB */
- p = buf;
- addr = (unsigned long)p;
- patch_branch(p, addr, 0);
- q = buf + 0x2000000;
- translate_branch(&instr, q, p);
- patch_instruction(q, instr);
- check(instr_is_branch_to_addr(p, addr));
- check(instr_is_branch_to_addr(q, addr));
- check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x4a000000)));
-
- /* Maximum positive case, move x to x - 32 MB + 4 */
- p = buf + 0x2000000;
- addr = (unsigned long)p;
- patch_branch(p, addr, 0);
- q = buf + 4;
- translate_branch(&instr, q, p);
- patch_instruction(q, instr);
- check(instr_is_branch_to_addr(p, addr));
- check(instr_is_branch_to_addr(q, addr));
- check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x49fffffc)));
-
- /* Jump to x + 16 MB moved to x + 20 MB */
- p = buf;
- addr = 0x1000000 + (unsigned long)buf;
- patch_branch(p, addr, BRANCH_SET_LINK);
- q = buf + 0x1400000;
- translate_branch(&instr, q, p);
- patch_instruction(q, instr);
- check(instr_is_branch_to_addr(p, addr));
- check(instr_is_branch_to_addr(q, addr));
-
- /* Jump to x + 16 MB moved to x - 16 MB + 4 */
- p = buf + 0x1000000;
- addr = 0x2000000 + (unsigned long)buf;
- patch_branch(p, addr, 0);
- q = buf + 4;
- translate_branch(&instr, q, p);
- patch_instruction(q, instr);
- check(instr_is_branch_to_addr(p, addr));
- check(instr_is_branch_to_addr(q, addr));
-
-
- /* Conditional branch tests */
-
- /* Simple case, branch to self moved a little */
- p = buf;
- addr = (unsigned long)p;
- create_cond_branch(&instr, p, addr, 0);
- patch_instruction(p, instr);
- check(instr_is_branch_to_addr(p, addr));
- q = buf + 4;
- translate_branch(&instr, q, p);
- patch_instruction(q, instr);
- check(instr_is_branch_to_addr(q, addr));
-
- /* Maximum negative case, move b . to addr + 32 KB */
- p = buf;
- addr = (unsigned long)p;
- create_cond_branch(&instr, p, addr, 0xFFFFFFFC);
- patch_instruction(p, instr);
- q = buf + 0x8000;
- translate_branch(&instr, q, p);
- patch_instruction(q, instr);
- check(instr_is_branch_to_addr(p, addr));
- check(instr_is_branch_to_addr(q, addr));
- check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff8000)));
-
- /* Maximum positive case, move x to x - 32 KB + 4 */
- p = buf + 0x8000;
- addr = (unsigned long)p;
- create_cond_branch(&instr, p, addr, 0xFFFFFFFC);
- patch_instruction(p, instr);
- q = buf + 4;
- translate_branch(&instr, q, p);
- patch_instruction(q, instr);
- check(instr_is_branch_to_addr(p, addr));
- check(instr_is_branch_to_addr(q, addr));
- check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff7ffc)));
-
- /* Jump to x + 12 KB moved to x + 20 KB */
- p = buf;
- addr = 0x3000 + (unsigned long)buf;
- create_cond_branch(&instr, p, addr, BRANCH_SET_LINK);
- patch_instruction(p, instr);
- q = buf + 0x5000;
- translate_branch(&instr, q, p);
- patch_instruction(q, instr);
- check(instr_is_branch_to_addr(p, addr));
- check(instr_is_branch_to_addr(q, addr));
-
- /* Jump to x + 8 KB moved to x - 8 KB + 4 */
- p = buf + 0x2000;
- addr = 0x4000 + (unsigned long)buf;
- create_cond_branch(&instr, p, addr, 0);
- patch_instruction(p, instr);
- q = buf + 4;
- translate_branch(&instr, q, p);
- patch_instruction(q, instr);
- check(instr_is_branch_to_addr(p, addr));
- check(instr_is_branch_to_addr(q, addr));
-
- /* Free the buffer we were using */
- vfree(buf);
-}
-
-#ifdef CONFIG_PPC64
-static void __init test_prefixed_patching(void)
-{
- extern unsigned int code_patching_test1[];
- extern unsigned int code_patching_test1_expected[];
- extern unsigned int end_code_patching_test1[];
-
- __patch_instruction(code_patching_test1,
- ppc_inst_prefix(OP_PREFIX << 26, 0x00000000),
- code_patching_test1);
-
- check(!memcmp(code_patching_test1,
- code_patching_test1_expected,
- sizeof(unsigned int) *
- (end_code_patching_test1 - code_patching_test1)));
-}
-#else
-static inline void test_prefixed_patching(void) {}
-#endif
-
-static int __init test_code_patching(void)
-{
- printk(KERN_DEBUG "Running code patching self-tests ...\n");
-
- test_branch_iform();
- test_branch_bform();
- test_create_function_call();
- test_translate_branch();
- test_prefixed_patching();
-
- return 0;
-}
-late_initcall(test_code_patching);
-
-#endif /* CONFIG_CODE_PATCHING_SELFTEST */
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index c3e06922468b..343a78826035 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -47,7 +47,7 @@ static u32 *calc_addr(struct fixup_entry *fcur, long offset)
static int patch_alt_instruction(u32 *src, u32 *dest, u32 *alt_start, u32 *alt_end)
{
int err;
- struct ppc_inst instr;
+ ppc_inst_t instr;
instr = ppc_inst_read(src);
@@ -580,7 +580,7 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_
printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i);
}
-static void patch_btb_flush_section(long *curr)
+static void __init patch_btb_flush_section(long *curr)
{
unsigned int *start, *end;
@@ -592,7 +592,7 @@ static void patch_btb_flush_section(long *curr)
}
}
-void do_btb_flush_fixups(void)
+void __init do_btb_flush_fixups(void)
{
long *start, *end;
@@ -621,10 +621,10 @@ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
}
}
-static void do_final_fixups(void)
+static void __init do_final_fixups(void)
{
#if defined(CONFIG_PPC64) && defined(CONFIG_RELOCATABLE)
- struct ppc_inst inst;
+ ppc_inst_t inst;
u32 *src, *dest, *end;
if (PHYSICAL_START == 0)
@@ -715,12 +715,12 @@ late_initcall(check_features);
/* This must be after the text it fixes up, vmlinux.lds.S enforces that atm */
static struct fixup_entry fixup;
-static long calc_offset(struct fixup_entry *entry, unsigned int *p)
+static long __init calc_offset(struct fixup_entry *entry, unsigned int *p)
{
return (unsigned long)p - (unsigned long)entry;
}
-static void test_basic_patching(void)
+static void __init test_basic_patching(void)
{
extern unsigned int ftr_fixup_test1[];
extern unsigned int end_ftr_fixup_test1[];
@@ -751,7 +751,7 @@ static void test_basic_patching(void)
check(memcmp(ftr_fixup_test1, ftr_fixup_test1_expected, size) == 0);
}
-static void test_alternative_patching(void)
+static void __init test_alternative_patching(void)
{
extern unsigned int ftr_fixup_test2[];
extern unsigned int end_ftr_fixup_test2[];
@@ -784,7 +784,7 @@ static void test_alternative_patching(void)
check(memcmp(ftr_fixup_test2, ftr_fixup_test2_expected, size) == 0);
}
-static void test_alternative_case_too_big(void)
+static void __init test_alternative_case_too_big(void)
{
extern unsigned int ftr_fixup_test3[];
extern unsigned int end_ftr_fixup_test3[];
@@ -810,7 +810,7 @@ static void test_alternative_case_too_big(void)
check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
}
-static void test_alternative_case_too_small(void)
+static void __init test_alternative_case_too_small(void)
{
extern unsigned int ftr_fixup_test4[];
extern unsigned int end_ftr_fixup_test4[];
@@ -856,7 +856,7 @@ static void test_alternative_case_with_branch(void)
check(memcmp(ftr_fixup_test5, ftr_fixup_test5_expected, size) == 0);
}
-static void test_alternative_case_with_external_branch(void)
+static void __init test_alternative_case_with_external_branch(void)
{
extern unsigned int ftr_fixup_test6[];
extern unsigned int end_ftr_fixup_test6[];
@@ -866,7 +866,7 @@ static void test_alternative_case_with_external_branch(void)
check(memcmp(ftr_fixup_test6, ftr_fixup_test6_expected, size) == 0);
}
-static void test_alternative_case_with_branch_to_end(void)
+static void __init test_alternative_case_with_branch_to_end(void)
{
extern unsigned int ftr_fixup_test7[];
extern unsigned int end_ftr_fixup_test7[];
@@ -876,7 +876,7 @@ static void test_alternative_case_with_branch_to_end(void)
check(memcmp(ftr_fixup_test7, ftr_fixup_test7_expected, size) == 0);
}
-static void test_cpu_macros(void)
+static void __init test_cpu_macros(void)
{
extern u8 ftr_fixup_test_FTR_macros[];
extern u8 ftr_fixup_test_FTR_macros_expected[];
@@ -888,7 +888,7 @@ static void test_cpu_macros(void)
ftr_fixup_test_FTR_macros_expected, size) == 0);
}
-static void test_fw_macros(void)
+static void __init test_fw_macros(void)
{
#ifdef CONFIG_PPC64
extern u8 ftr_fixup_test_FW_FTR_macros[];
@@ -902,7 +902,7 @@ static void test_fw_macros(void)
#endif
}
-static void test_lwsync_macros(void)
+static void __init test_lwsync_macros(void)
{
extern u8 lwsync_fixup_test[];
extern u8 end_lwsync_fixup_test[];
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 86f49e3e7cf5..a94b0cd0bdc5 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1354,7 +1354,7 @@ static nokprobe_inline int trap_compare(long v1, long v2)
* otherwise.
*/
int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
- struct ppc_inst instr)
+ ppc_inst_t instr)
{
#ifdef CONFIG_PPC64
unsigned int suffixopcode, prefixtype, prefix_r;
@@ -3578,7 +3578,7 @@ NOKPROBE_SYMBOL(emulate_loadstore);
* or -1 if the instruction is one that should not be stepped,
* such as an rfid, or a mtmsrd that would clear MSR_RI.
*/
-int emulate_step(struct pt_regs *regs, struct ppc_inst instr)
+int emulate_step(struct pt_regs *regs, ppc_inst_t instr)
{
struct instruction_op op;
int r, err, type;
diff --git a/arch/powerpc/lib/test-code-patching.c b/arch/powerpc/lib/test-code-patching.c
new file mode 100644
index 000000000000..c44823292f73
--- /dev/null
+++ b/arch/powerpc/lib/test-code-patching.c
@@ -0,0 +1,362 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2008 Michael Ellerman, IBM Corporation.
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+
+#include <asm/code-patching.h>
+
+static int __init instr_is_branch_to_addr(const u32 *instr, unsigned long addr)
+{
+ if (instr_is_branch_iform(ppc_inst_read(instr)) ||
+ instr_is_branch_bform(ppc_inst_read(instr)))
+ return branch_target(instr) == addr;
+
+ return 0;
+}
+
+static void __init test_trampoline(void)
+{
+ asm ("nop;nop;\n");
+}
+
+#define check(x) do { \
+ if (!(x)) \
+ pr_err("code-patching: test failed at line %d\n", __LINE__); \
+} while (0)
+
+static void __init test_branch_iform(void)
+{
+ int err;
+ ppc_inst_t instr;
+ u32 tmp[2];
+ u32 *iptr = tmp;
+ unsigned long addr = (unsigned long)tmp;
+
+ /* The simplest case, branch to self, no flags */
+ check(instr_is_branch_iform(ppc_inst(0x48000000)));
+ /* All bits of target set, and flags */
+ check(instr_is_branch_iform(ppc_inst(0x4bffffff)));
+ /* High bit of opcode set, which is wrong */
+ check(!instr_is_branch_iform(ppc_inst(0xcbffffff)));
+ /* Middle bits of opcode set, which is wrong */
+ check(!instr_is_branch_iform(ppc_inst(0x7bffffff)));
+
+ /* Simplest case, branch to self with link */
+ check(instr_is_branch_iform(ppc_inst(0x48000001)));
+ /* All bits of targets set */
+ check(instr_is_branch_iform(ppc_inst(0x4bfffffd)));
+ /* Some bits of targets set */
+ check(instr_is_branch_iform(ppc_inst(0x4bff00fd)));
+ /* Must be a valid branch to start with */
+ check(!instr_is_branch_iform(ppc_inst(0x7bfffffd)));
+
+ /* Absolute branch to 0x100 */
+ ppc_inst_write(iptr, ppc_inst(0x48000103));
+ check(instr_is_branch_to_addr(iptr, 0x100));
+ /* Absolute branch to 0x420fc */
+ ppc_inst_write(iptr, ppc_inst(0x480420ff));
+ check(instr_is_branch_to_addr(iptr, 0x420fc));
+ /* Maximum positive relative branch, + 20MB - 4B */
+ ppc_inst_write(iptr, ppc_inst(0x49fffffc));
+ check(instr_is_branch_to_addr(iptr, addr + 0x1FFFFFC));
+ /* Smallest negative relative branch, - 4B */
+ ppc_inst_write(iptr, ppc_inst(0x4bfffffc));
+ check(instr_is_branch_to_addr(iptr, addr - 4));
+ /* Largest negative relative branch, - 32 MB */
+ ppc_inst_write(iptr, ppc_inst(0x4a000000));
+ check(instr_is_branch_to_addr(iptr, addr - 0x2000000));
+
+ /* Branch to self, with link */
+ err = create_branch(&instr, iptr, addr, BRANCH_SET_LINK);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr));
+
+ /* Branch to self - 0x100, with link */
+ err = create_branch(&instr, iptr, addr - 0x100, BRANCH_SET_LINK);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr - 0x100));
+
+ /* Branch to self + 0x100, no link */
+ err = create_branch(&instr, iptr, addr + 0x100, 0);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr + 0x100));
+
+ /* Maximum relative negative offset, - 32 MB */
+ err = create_branch(&instr, iptr, addr - 0x2000000, BRANCH_SET_LINK);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr - 0x2000000));
+
+ /* Out of range relative negative offset, - 32 MB + 4*/
+ err = create_branch(&instr, iptr, addr - 0x2000004, BRANCH_SET_LINK);
+ check(err);
+
+ /* Out of range relative positive offset, + 32 MB */
+ err = create_branch(&instr, iptr, addr + 0x2000000, BRANCH_SET_LINK);
+ check(err);
+
+ /* Unaligned target */
+ err = create_branch(&instr, iptr, addr + 3, BRANCH_SET_LINK);
+ check(err);
+
+ /* Check flags are masked correctly */
+ err = create_branch(&instr, iptr, addr, 0xFFFFFFFC);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr));
+ check(ppc_inst_equal(instr, ppc_inst(0x48000000)));
+}
+
+static void __init test_create_function_call(void)
+{
+ u32 *iptr;
+ unsigned long dest;
+ ppc_inst_t instr;
+
+ /* Check we can create a function call */
+ iptr = (u32 *)ppc_function_entry(test_trampoline);
+ dest = ppc_function_entry(test_create_function_call);
+ create_branch(&instr, iptr, dest, BRANCH_SET_LINK);
+ patch_instruction(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, dest));
+}
+
+static void __init test_branch_bform(void)
+{
+ int err;
+ unsigned long addr;
+ ppc_inst_t instr;
+ u32 tmp[2];
+ u32 *iptr = tmp;
+ unsigned int flags;
+
+ addr = (unsigned long)iptr;
+
+ /* The simplest case, branch to self, no flags */
+ check(instr_is_branch_bform(ppc_inst(0x40000000)));
+ /* All bits of target set, and flags */
+ check(instr_is_branch_bform(ppc_inst(0x43ffffff)));
+ /* High bit of opcode set, which is wrong */
+ check(!instr_is_branch_bform(ppc_inst(0xc3ffffff)));
+ /* Middle bits of opcode set, which is wrong */
+ check(!instr_is_branch_bform(ppc_inst(0x7bffffff)));
+
+ /* Absolute conditional branch to 0x100 */
+ ppc_inst_write(iptr, ppc_inst(0x43ff0103));
+ check(instr_is_branch_to_addr(iptr, 0x100));
+ /* Absolute conditional branch to 0x20fc */
+ ppc_inst_write(iptr, ppc_inst(0x43ff20ff));
+ check(instr_is_branch_to_addr(iptr, 0x20fc));
+ /* Maximum positive relative conditional branch, + 32 KB - 4B */
+ ppc_inst_write(iptr, ppc_inst(0x43ff7ffc));
+ check(instr_is_branch_to_addr(iptr, addr + 0x7FFC));
+ /* Smallest negative relative conditional branch, - 4B */
+ ppc_inst_write(iptr, ppc_inst(0x43fffffc));
+ check(instr_is_branch_to_addr(iptr, addr - 4));
+ /* Largest negative relative conditional branch, - 32 KB */
+ ppc_inst_write(iptr, ppc_inst(0x43ff8000));
+ check(instr_is_branch_to_addr(iptr, addr - 0x8000));
+
+ /* All condition code bits set & link */
+ flags = 0x3ff000 | BRANCH_SET_LINK;
+
+ /* Branch to self */
+ err = create_cond_branch(&instr, iptr, addr, flags);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr));
+
+ /* Branch to self - 0x100 */
+ err = create_cond_branch(&instr, iptr, addr - 0x100, flags);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr - 0x100));
+
+ /* Branch to self + 0x100 */
+ err = create_cond_branch(&instr, iptr, addr + 0x100, flags);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr + 0x100));
+
+ /* Maximum relative negative offset, - 32 KB */
+ err = create_cond_branch(&instr, iptr, addr - 0x8000, flags);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr - 0x8000));
+
+ /* Out of range relative negative offset, - 32 KB + 4*/
+ err = create_cond_branch(&instr, iptr, addr - 0x8004, flags);
+ check(err);
+
+ /* Out of range relative positive offset, + 32 KB */
+ err = create_cond_branch(&instr, iptr, addr + 0x8000, flags);
+ check(err);
+
+ /* Unaligned target */
+ err = create_cond_branch(&instr, iptr, addr + 3, flags);
+ check(err);
+
+ /* Check flags are masked correctly */
+ err = create_cond_branch(&instr, iptr, addr, 0xFFFFFFFC);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr));
+ check(ppc_inst_equal(instr, ppc_inst(0x43FF0000)));
+}
+
+static void __init test_translate_branch(void)
+{
+ unsigned long addr;
+ void *p, *q;
+ ppc_inst_t instr;
+ void *buf;
+
+ buf = vmalloc(PAGE_ALIGN(0x2000000 + 1));
+ check(buf);
+ if (!buf)
+ return;
+
+ /* Simple case, branch to self moved a little */
+ p = buf;
+ addr = (unsigned long)p;
+ create_branch(&instr, p, addr, 0);
+ ppc_inst_write(p, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ q = p + 4;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(q, addr));
+
+ /* Maximum negative case, move b . to addr + 32 MB */
+ p = buf;
+ addr = (unsigned long)p;
+ create_branch(&instr, p, addr, 0);
+ ppc_inst_write(p, instr);
+ q = buf + 0x2000000;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+ check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x4a000000)));
+
+ /* Maximum positive case, move x to x - 32 MB + 4 */
+ p = buf + 0x2000000;
+ addr = (unsigned long)p;
+ create_branch(&instr, p, addr, 0);
+ ppc_inst_write(p, instr);
+ q = buf + 4;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+ check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x49fffffc)));
+
+ /* Jump to x + 16 MB moved to x + 20 MB */
+ p = buf;
+ addr = 0x1000000 + (unsigned long)buf;
+ create_branch(&instr, p, addr, BRANCH_SET_LINK);
+ ppc_inst_write(p, instr);
+ q = buf + 0x1400000;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+
+ /* Jump to x + 16 MB moved to x - 16 MB + 4 */
+ p = buf + 0x1000000;
+ addr = 0x2000000 + (unsigned long)buf;
+ create_branch(&instr, p, addr, 0);
+ ppc_inst_write(p, instr);
+ q = buf + 4;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+
+
+ /* Conditional branch tests */
+
+ /* Simple case, branch to self moved a little */
+ p = buf;
+ addr = (unsigned long)p;
+ create_cond_branch(&instr, p, addr, 0);
+ ppc_inst_write(p, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ q = buf + 4;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(q, addr));
+
+ /* Maximum negative case, move b . to addr + 32 KB */
+ p = buf;
+ addr = (unsigned long)p;
+ create_cond_branch(&instr, p, addr, 0xFFFFFFFC);
+ ppc_inst_write(p, instr);
+ q = buf + 0x8000;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+ check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff8000)));
+
+ /* Maximum positive case, move x to x - 32 KB + 4 */
+ p = buf + 0x8000;
+ addr = (unsigned long)p;
+ create_cond_branch(&instr, p, addr, 0xFFFFFFFC);
+ ppc_inst_write(p, instr);
+ q = buf + 4;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+ check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff7ffc)));
+
+ /* Jump to x + 12 KB moved to x + 20 KB */
+ p = buf;
+ addr = 0x3000 + (unsigned long)buf;
+ create_cond_branch(&instr, p, addr, BRANCH_SET_LINK);
+ ppc_inst_write(p, instr);
+ q = buf + 0x5000;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+
+ /* Jump to x + 8 KB moved to x - 8 KB + 4 */
+ p = buf + 0x2000;
+ addr = 0x4000 + (unsigned long)buf;
+ create_cond_branch(&instr, p, addr, 0);
+ ppc_inst_write(p, instr);
+ q = buf + 4;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+
+ /* Free the buffer we were using */
+ vfree(buf);
+}
+
+static void __init test_prefixed_patching(void)
+{
+ u32 *iptr = (u32 *)ppc_function_entry(test_trampoline);
+ u32 expected[2] = {OP_PREFIX << 26, 0};
+ ppc_inst_t inst = ppc_inst_prefix(OP_PREFIX << 26, 0);
+
+ if (!IS_ENABLED(CONFIG_PPC64))
+ return;
+
+ patch_instruction(iptr, inst);
+
+ check(!memcmp(iptr, expected, sizeof(expected)));
+}
+
+static int __init test_code_patching(void)
+{
+ pr_info("Running code patching self-tests ...\n");
+
+ test_branch_iform();
+ test_branch_bform();
+ test_create_function_call();
+ test_translate_branch();
+ test_prefixed_patching();
+
+ return 0;
+}
+late_initcall(test_code_patching);
diff --git a/arch/powerpc/lib/test_code-patching.S b/arch/powerpc/lib/test_code-patching.S
deleted file mode 100644
index a9be6107844e..000000000000
--- a/arch/powerpc/lib/test_code-patching.S
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2020 IBM Corporation
- */
-#include <asm/ppc-opcode.h>
-
- .text
-
-#define globl(x) \
- .globl x; \
-x:
-
-globl(code_patching_test1)
- nop
- nop
-globl(end_code_patching_test1)
-
-globl(code_patching_test1_expected)
- .long OP_PREFIX << 26
- .long 0x0000000
diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c
index 8b4f6b3e96c4..4f141daafcff 100644
--- a/arch/powerpc/lib/test_emulate_step.c
+++ b/arch/powerpc/lib/test_emulate_step.c
@@ -792,7 +792,7 @@ static void __init test_lxvpx_stxvpx(void)
#ifdef CONFIG_VSX
static void __init test_plxvp_pstxvp(void)
{
- struct ppc_inst instr;
+ ppc_inst_t instr;
struct pt_regs regs;
union {
vector128 a;
@@ -906,7 +906,7 @@ struct compute_test {
struct {
char *descr;
unsigned long flags;
- struct ppc_inst instr;
+ ppc_inst_t instr;
struct pt_regs regs;
} subtests[MAX_SUBTESTS + 1];
};
@@ -1600,7 +1600,7 @@ static struct compute_test compute_tests[] = {
};
static int __init emulate_compute_instr(struct pt_regs *regs,
- struct ppc_inst instr,
+ ppc_inst_t instr,
bool negative)
{
int analysed;
@@ -1627,7 +1627,7 @@ static int __init emulate_compute_instr(struct pt_regs *regs,
}
static int __init execute_compute_instr(struct pt_regs *regs,
- struct ppc_inst instr)
+ ppc_inst_t instr)
{
extern int exec_instr(struct pt_regs *regs);
@@ -1658,7 +1658,7 @@ static void __init run_tests_compute(void)
struct compute_test *test;
struct pt_regs *regs, exp, got;
unsigned int i, j, k;
- struct ppc_inst instr;
+ ppc_inst_t instr;
bool ignore_gpr, ignore_xer, ignore_ccr, passed, rc, negative;
for (i = 0; i < ARRAY_SIZE(compute_tests); i++) {
diff --git a/arch/powerpc/lib/test_emulate_step_exec_instr.S b/arch/powerpc/lib/test_emulate_step_exec_instr.S
index 9ef941d958d8..5473f9d03df3 100644
--- a/arch/powerpc/lib/test_emulate_step_exec_instr.S
+++ b/arch/powerpc/lib/test_emulate_step_exec_instr.S
@@ -37,7 +37,7 @@ _GLOBAL(exec_instr)
* The stack pointer (GPR1) and the thread pointer (GPR13) are not
* saved as these should not be modified anyway.
*/
- SAVE_2GPRS(2, r1)
+ SAVE_GPRS(2, 3, r1)
SAVE_NVGPRS(r1)
/*
@@ -75,8 +75,7 @@ _GLOBAL(exec_instr)
/* Load GPRs from pt_regs */
REST_GPR(0, r31)
- REST_10GPRS(2, r31)
- REST_GPR(12, r31)
+ REST_GPRS(2, 12, r31)
REST_NVGPRS(r31)
/* Placeholder for the test instruction */
@@ -99,8 +98,7 @@ _GLOBAL(exec_instr)
subi r3, r3, GPR0
SAVE_GPR(0, r3)
SAVE_GPR(2, r3)
- SAVE_8GPRS(4, r3)
- SAVE_GPR(12, r3)
+ SAVE_GPRS(4, 12, r3)
SAVE_NVGPRS(r3)
/* Save resulting LR to pt_regs */
diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile
index 15f4773643d2..50dd8f6bdf46 100644
--- a/arch/powerpc/mm/book3s32/Makefile
+++ b/arch/powerpc/mm/book3s32/Makefile
@@ -9,5 +9,4 @@ endif
obj-y += mmu.o mmu_context.o
obj-$(CONFIG_PPC_BOOK3S_603) += nohash_low.o
obj-$(CONFIG_PPC_BOOK3S_604) += hash_low.o tlb.o
-obj-$(CONFIG_PPC_KUEP) += kuep.o
obj-$(CONFIG_PPC_KUAP) += kuap.o
diff --git a/arch/powerpc/mm/book3s32/kuap.c b/arch/powerpc/mm/book3s32/kuap.c
index 0f920f09af57..28676cabb005 100644
--- a/arch/powerpc/mm/book3s32/kuap.c
+++ b/arch/powerpc/mm/book3s32/kuap.c
@@ -20,8 +20,11 @@ EXPORT_SYMBOL(kuap_unlock_all_ool);
void setup_kuap(bool disabled)
{
- if (!disabled)
+ if (!disabled) {
kuap_lock_all_ool();
+ init_mm.context.sr0 |= SR_KS;
+ current->thread.sr0 |= SR_KS;
+ }
if (smp_processor_id() != boot_cpuid)
return;
diff --git a/arch/powerpc/mm/book3s32/kuep.c b/arch/powerpc/mm/book3s32/kuep.c
deleted file mode 100644
index c20733d6e02c..000000000000
--- a/arch/powerpc/mm/book3s32/kuep.c
+++ /dev/null
@@ -1,20 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#include <asm/kup.h>
-#include <asm/smp.h>
-
-struct static_key_false disable_kuep_key;
-
-void setup_kuep(bool disabled)
-{
- if (!disabled)
- kuep_lock();
-
- if (smp_processor_id() != boot_cpuid)
- return;
-
- if (disabled)
- static_branch_enable(&disable_kuep_key);
- else
- pr_info("Activating Kernel Userspace Execution Prevention\n");
-}
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index 27061583a010..94045b265b6b 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -76,7 +76,7 @@ unsigned long p_block_mapped(phys_addr_t pa)
return 0;
}
-static int find_free_bat(void)
+static int __init find_free_bat(void)
{
int b;
int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
@@ -196,18 +196,17 @@ void mmu_mark_initmem_nx(void)
int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
int i;
unsigned long base = (unsigned long)_stext - PAGE_OFFSET;
- unsigned long top = (unsigned long)_etext - PAGE_OFFSET;
+ unsigned long top = ALIGN((unsigned long)_etext - PAGE_OFFSET, SZ_128K);
unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET;
unsigned long size;
- for (i = 0; i < nb - 1 && base < top && top - base > (128 << 10);) {
+ for (i = 0; i < nb - 1 && base < top;) {
size = block_size(base, top);
setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT);
base += size;
}
if (base < top) {
size = block_size(base, top);
- size = max(size, 128UL << 10);
if ((top - base) > size) {
size <<= 1;
if (strict_kernel_rwx_enabled() && base + size > border)
diff --git a/arch/powerpc/mm/book3s32/mmu_context.c b/arch/powerpc/mm/book3s32/mmu_context.c
index e2708e387dc3..269a3eb25a73 100644
--- a/arch/powerpc/mm/book3s32/mmu_context.c
+++ b/arch/powerpc/mm/book3s32/mmu_context.c
@@ -69,6 +69,12 @@ EXPORT_SYMBOL_GPL(__init_new_context);
int init_new_context(struct task_struct *t, struct mm_struct *mm)
{
mm->context.id = __init_new_context();
+ mm->context.sr0 = CTX_TO_VSID(mm->context.id, 0);
+
+ if (!kuep_is_disabled())
+ mm->context.sr0 |= SR_NX;
+ if (!kuap_is_disabled())
+ mm->context.sr0 |= SR_KS;
return 0;
}
@@ -108,20 +114,13 @@ void __init mmu_context_init(void)
void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk)
{
long id = next->context.id;
- unsigned long val;
if (id < 0)
panic("mm_struct %p has no context ID", next);
isync();
- val = CTX_TO_VSID(id, 0);
- if (!kuep_is_disabled())
- val |= SR_NX;
- if (!kuap_is_disabled())
- val |= SR_KS;
-
- update_user_segments(val);
+ update_user_segments(next->context.sr0);
if (IS_ENABLED(CONFIG_BDI_SWITCH))
abatron_pteptrs[1] = next->pgd;
diff --git a/arch/powerpc/mm/book3s64/Makefile b/arch/powerpc/mm/book3s64/Makefile
index 1b56d3af47d4..2d50cac499c5 100644
--- a/arch/powerpc/mm/book3s64/Makefile
+++ b/arch/powerpc/mm/book3s64/Makefile
@@ -2,20 +2,23 @@
ccflags-y := $(NO_MINIMAL_TOC)
+obj-y += mmu_context.o pgtable.o trace.o
+ifdef CONFIG_PPC_64S_HASH_MMU
CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE)
-
-obj-y += hash_pgtable.o hash_utils.o slb.o \
- mmu_context.o pgtable.o hash_tlb.o
-obj-$(CONFIG_PPC_NATIVE) += hash_native.o
-obj-$(CONFIG_PPC_RADIX_MMU) += radix_pgtable.o radix_tlb.o
+obj-y += hash_pgtable.o hash_utils.o hash_tlb.o slb.o
+obj-$(CONFIG_PPC_HASH_MMU_NATIVE) += hash_native.o
obj-$(CONFIG_PPC_4K_PAGES) += hash_4k.o
obj-$(CONFIG_PPC_64K_PAGES) += hash_64k.o
-obj-$(CONFIG_HUGETLB_PAGE) += hash_hugetlbpage.o
+obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hash_hugepage.o
+obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage_prot.o
+endif
+
+obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
+
+obj-$(CONFIG_PPC_RADIX_MMU) += radix_pgtable.o radix_tlb.o
ifdef CONFIG_HUGETLB_PAGE
obj-$(CONFIG_PPC_RADIX_MMU) += radix_hugetlbpage.o
endif
-obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hash_hugepage.o
-obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage_prot.o
obj-$(CONFIG_SPAPR_TCE_IOMMU) += iommu_api.o
obj-$(CONFIG_PPC_PKEY) += pkeys.o
diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c
index d8279bfe68ea..623a7b7ab38b 100644
--- a/arch/powerpc/mm/book3s64/hash_native.c
+++ b/arch/powerpc/mm/book3s64/hash_native.c
@@ -43,110 +43,6 @@
static DEFINE_RAW_SPINLOCK(native_tlbie_lock);
-static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is)
-{
- unsigned long rb;
-
- rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
-
- asm volatile("tlbiel %0" : : "r" (rb));
-}
-
-/*
- * tlbiel instruction for hash, set invalidation
- * i.e., r=1 and is=01 or is=10 or is=11
- */
-static __always_inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is,
- unsigned int pid,
- unsigned int ric, unsigned int prs)
-{
- unsigned long rb;
- unsigned long rs;
- unsigned int r = 0; /* hash format */
-
- rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
- rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
-
- asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4)
- : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "i"(r)
- : "memory");
-}
-
-
-static void tlbiel_all_isa206(unsigned int num_sets, unsigned int is)
-{
- unsigned int set;
-
- asm volatile("ptesync": : :"memory");
-
- for (set = 0; set < num_sets; set++)
- tlbiel_hash_set_isa206(set, is);
-
- ppc_after_tlbiel_barrier();
-}
-
-static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
-{
- unsigned int set;
-
- asm volatile("ptesync": : :"memory");
-
- /*
- * Flush the partition table cache if this is HV mode.
- */
- if (early_cpu_has_feature(CPU_FTR_HVMODE))
- tlbiel_hash_set_isa300(0, is, 0, 2, 0);
-
- /*
- * Now invalidate the process table cache. UPRT=0 HPT modes (what
- * current hardware implements) do not use the process table, but
- * add the flushes anyway.
- *
- * From ISA v3.0B p. 1078:
- * The following forms are invalid.
- * * PRS=1, R=0, and RIC!=2 (The only process-scoped
- * HPT caching is of the Process Table.)
- */
- tlbiel_hash_set_isa300(0, is, 0, 2, 1);
-
- /*
- * Then flush the sets of the TLB proper. Hash mode uses
- * partition scoped TLB translations, which may be flushed
- * in !HV mode.
- */
- for (set = 0; set < num_sets; set++)
- tlbiel_hash_set_isa300(set, is, 0, 0, 0);
-
- ppc_after_tlbiel_barrier();
-
- asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
-}
-
-void hash__tlbiel_all(unsigned int action)
-{
- unsigned int is;
-
- switch (action) {
- case TLB_INVAL_SCOPE_GLOBAL:
- is = 3;
- break;
- case TLB_INVAL_SCOPE_LPID:
- is = 2;
- break;
- default:
- BUG();
- }
-
- if (early_cpu_has_feature(CPU_FTR_ARCH_300))
- tlbiel_all_isa300(POWER9_TLB_SETS_HASH, is);
- else if (early_cpu_has_feature(CPU_FTR_ARCH_207S))
- tlbiel_all_isa206(POWER8_TLB_SETS, is);
- else if (early_cpu_has_feature(CPU_FTR_ARCH_206))
- tlbiel_all_isa206(POWER7_TLB_SETS, is);
- else
- WARN(1, "%s called on pre-POWER7 CPU\n", __func__);
-}
-
static inline unsigned long ___tlbie(unsigned long vpn, int psize,
int apsize, int ssize)
{
@@ -267,7 +163,7 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
va |= ssize << 8;
sllp = get_sllp_encoding(apsize);
va |= sllp << 5;
- asm volatile(ASM_FTR_IFSET("tlbiel %0", "tlbiel %0,0", %1)
+ asm volatile(ASM_FTR_IFSET("tlbiel %0", PPC_TLBIEL_v205(%0, 0), %1)
: : "r" (va), "i" (CPU_FTR_ARCH_206)
: "memory");
break;
@@ -286,7 +182,7 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
*/
va |= (vpn & 0xfe);
va |= 1; /* L */
- asm volatile(ASM_FTR_IFSET("tlbiel %0", "tlbiel %0,1", %1)
+ asm volatile(ASM_FTR_IFSET("tlbiel %0", PPC_TLBIEL_v205(%0, 1), %1)
: : "r" (va), "i" (CPU_FTR_ARCH_206)
: "memory");
break;
diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c
index ad5eff097d31..7ce8914992e3 100644
--- a/arch/powerpc/mm/book3s64/hash_pgtable.c
+++ b/arch/powerpc/mm/book3s64/hash_pgtable.c
@@ -16,7 +16,6 @@
#include <mm/mmu_decl.h>
-#define CREATE_TRACE_POINTS
#include <trace/events/thp.h>
#if H_PGTABLE_RANGE > (USER_VSID_RANGE * (TASK_SIZE_USER64 / TASK_CONTEXT_SIZE))
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index cfd45245d009..7abf82a698d3 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -99,8 +99,6 @@
*/
static unsigned long _SDR1;
-struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
-EXPORT_SYMBOL_GPL(mmu_psize_defs);
u8 hpte_page_sizes[1 << LP_BITS];
EXPORT_SYMBOL_GPL(hpte_page_sizes);
@@ -114,9 +112,6 @@ EXPORT_SYMBOL_GPL(mmu_linear_psize);
int mmu_virtual_psize = MMU_PAGE_4K;
int mmu_vmalloc_psize = MMU_PAGE_4K;
EXPORT_SYMBOL_GPL(mmu_vmalloc_psize);
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-int mmu_vmemmap_psize = MMU_PAGE_4K;
-#endif
int mmu_io_psize = MMU_PAGE_4K;
int mmu_kernel_ssize = MMU_SEGSIZE_256M;
EXPORT_SYMBOL_GPL(mmu_kernel_ssize);
@@ -175,6 +170,110 @@ static struct mmu_psize_def mmu_psize_defaults_gp[] = {
},
};
+static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is)
+{
+ unsigned long rb;
+
+ rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
+
+ asm volatile("tlbiel %0" : : "r" (rb));
+}
+
+/*
+ * tlbiel instruction for hash, set invalidation
+ * i.e., r=1 and is=01 or is=10 or is=11
+ */
+static __always_inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is,
+ unsigned int pid,
+ unsigned int ric, unsigned int prs)
+{
+ unsigned long rb;
+ unsigned long rs;
+ unsigned int r = 0; /* hash format */
+
+ rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
+ rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
+
+ asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4)
+ : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "i"(r)
+ : "memory");
+}
+
+
+static void tlbiel_all_isa206(unsigned int num_sets, unsigned int is)
+{
+ unsigned int set;
+
+ asm volatile("ptesync": : :"memory");
+
+ for (set = 0; set < num_sets; set++)
+ tlbiel_hash_set_isa206(set, is);
+
+ ppc_after_tlbiel_barrier();
+}
+
+static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
+{
+ unsigned int set;
+
+ asm volatile("ptesync": : :"memory");
+
+ /*
+ * Flush the partition table cache if this is HV mode.
+ */
+ if (early_cpu_has_feature(CPU_FTR_HVMODE))
+ tlbiel_hash_set_isa300(0, is, 0, 2, 0);
+
+ /*
+ * Now invalidate the process table cache. UPRT=0 HPT modes (what
+ * current hardware implements) do not use the process table, but
+ * add the flushes anyway.
+ *
+ * From ISA v3.0B p. 1078:
+ * The following forms are invalid.
+ * * PRS=1, R=0, and RIC!=2 (The only process-scoped
+ * HPT caching is of the Process Table.)
+ */
+ tlbiel_hash_set_isa300(0, is, 0, 2, 1);
+
+ /*
+ * Then flush the sets of the TLB proper. Hash mode uses
+ * partition scoped TLB translations, which may be flushed
+ * in !HV mode.
+ */
+ for (set = 0; set < num_sets; set++)
+ tlbiel_hash_set_isa300(set, is, 0, 0, 0);
+
+ ppc_after_tlbiel_barrier();
+
+ asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
+}
+
+void hash__tlbiel_all(unsigned int action)
+{
+ unsigned int is;
+
+ switch (action) {
+ case TLB_INVAL_SCOPE_GLOBAL:
+ is = 3;
+ break;
+ case TLB_INVAL_SCOPE_LPID:
+ is = 2;
+ break;
+ default:
+ BUG();
+ }
+
+ if (early_cpu_has_feature(CPU_FTR_ARCH_300))
+ tlbiel_all_isa300(POWER9_TLB_SETS_HASH, is);
+ else if (early_cpu_has_feature(CPU_FTR_ARCH_207S))
+ tlbiel_all_isa206(POWER8_TLB_SETS, is);
+ else if (early_cpu_has_feature(CPU_FTR_ARCH_206))
+ tlbiel_all_isa206(POWER7_TLB_SETS, is);
+ else
+ WARN(1, "%s called on pre-POWER7 CPU\n", __func__);
+}
+
/*
* 'R' and 'C' update notes:
* - Under pHyp or KVM, the updatepp path will not set C, thus it *will*
@@ -563,7 +662,7 @@ static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
}
#endif /* CONFIG_HUGETLB_PAGE */
-static void mmu_psize_set_default_penc(void)
+static void __init mmu_psize_set_default_penc(void)
{
int bpsize, apsize;
for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++)
@@ -573,7 +672,7 @@ static void mmu_psize_set_default_penc(void)
#ifdef CONFIG_PPC_64K_PAGES
-static bool might_have_hea(void)
+static bool __init might_have_hea(void)
{
/*
* The HEA ethernet adapter requires awareness of the
@@ -644,7 +743,7 @@ static void __init htab_scan_page_sizes(void)
* low-order N bits as the encoding for the 2^(12+N) byte page size
* (if it exists).
*/
-static void init_hpte_page_sizes(void)
+static void __init init_hpte_page_sizes(void)
{
long int ap, bp;
long int shift, penc;
@@ -1091,7 +1190,7 @@ void __init hash__early_init_mmu(void)
ps3_early_mm_init();
else if (firmware_has_feature(FW_FEATURE_LPAR))
hpte_init_pseries();
- else if (IS_ENABLED(CONFIG_PPC_NATIVE))
+ else if (IS_ENABLED(CONFIG_PPC_HASH_MMU_NATIVE))
hpte_init_native();
if (!mmu_hash_ops.hpte_insert)
diff --git a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c b/arch/powerpc/mm/book3s64/hugetlbpage.c
index a688e1324ae5..ea8f83afb0ae 100644
--- a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c
+++ b/arch/powerpc/mm/book3s64/hugetlbpage.c
@@ -16,6 +16,7 @@
unsigned int hpage_shift;
EXPORT_SYMBOL(hpage_shift);
+#ifdef CONFIG_PPC_64S_HASH_MMU
int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
pte_t *ptep, unsigned long trap, unsigned long flags,
int ssize, unsigned int shift, unsigned int mmu_psize)
@@ -122,6 +123,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
}
+#endif
pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
@@ -148,7 +150,7 @@ void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr
set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
}
-void hugetlbpage_init_default(void)
+void __init hugetlbpage_init_default(void)
{
/* Set default large page size. Currently, we pick 16M or 1M
* depending on what is available
diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c
index c10fc8a72fb3..c766e4c26e42 100644
--- a/arch/powerpc/mm/book3s64/mmu_context.c
+++ b/arch/powerpc/mm/book3s64/mmu_context.c
@@ -31,7 +31,8 @@ static int alloc_context_id(int min_id, int max_id)
return ida_alloc_range(&mmu_context_ida, min_id, max_id, GFP_KERNEL);
}
-void hash__reserve_context_id(int id)
+#ifdef CONFIG_PPC_64S_HASH_MMU
+void __init hash__reserve_context_id(int id)
{
int result = ida_alloc_range(&mmu_context_ida, id, id, GFP_KERNEL);
@@ -50,7 +51,9 @@ int hash__alloc_context_id(void)
return alloc_context_id(MIN_USER_CONTEXT, max);
}
EXPORT_SYMBOL_GPL(hash__alloc_context_id);
+#endif
+#ifdef CONFIG_PPC_64S_HASH_MMU
static int realloc_context_ids(mm_context_t *ctx)
{
int i, id;
@@ -150,6 +153,13 @@ void hash__setup_new_exec(void)
slb_setup_new_exec();
}
+#else
+static inline int hash__init_new_context(struct mm_struct *mm)
+{
+ BUILD_BUG();
+ return 0;
+}
+#endif
static int radix__init_new_context(struct mm_struct *mm)
{
@@ -175,7 +185,9 @@ static int radix__init_new_context(struct mm_struct *mm)
*/
asm volatile("ptesync;isync" : : : "memory");
+#ifdef CONFIG_PPC_64S_HASH_MMU
mm->context.hash_context = NULL;
+#endif
return index;
}
@@ -213,14 +225,22 @@ EXPORT_SYMBOL_GPL(__destroy_context);
static void destroy_contexts(mm_context_t *ctx)
{
- int index, context_id;
+ if (radix_enabled()) {
+ ida_free(&mmu_context_ida, ctx->id);
+ } else {
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ int index, context_id;
- for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) {
- context_id = ctx->extended_id[index];
- if (context_id)
- ida_free(&mmu_context_ida, context_id);
+ for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) {
+ context_id = ctx->extended_id[index];
+ if (context_id)
+ ida_free(&mmu_context_ida, context_id);
+ }
+ kfree(ctx->hash_context);
+#else
+ BUILD_BUG(); // radix_enabled() should be constant true
+#endif
}
- kfree(ctx->hash_context);
}
static void pmd_frag_destroy(void *pmd_frag)
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index 9e16c7b1a6c5..79ce3c22a29d 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -22,6 +22,13 @@
#include "internal.h"
+struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+EXPORT_SYMBOL_GPL(mmu_psize_defs);
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+int mmu_vmemmap_psize = MMU_PAGE_4K;
+#endif
+
unsigned long __pmd_frag_nr;
EXPORT_SYMBOL(__pmd_frag_nr);
unsigned long __pmd_frag_size_shift;
@@ -207,17 +214,12 @@ void __init mmu_partition_table_init(void)
unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
unsigned long ptcr;
- BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large.");
/* Initialize the Partition Table with no entries */
partition_tb = memblock_alloc(patb_size, patb_size);
if (!partition_tb)
panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
__func__, patb_size, patb_size);
- /*
- * update partition table control register,
- * 64 K size.
- */
ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12);
set_ptcr_when_no_uv(ptcr);
powernv_set_nmmu_ptcr(ptcr);
@@ -526,3 +528,23 @@ static int __init pgtable_debugfs_setup(void)
return 0;
}
arch_initcall(pgtable_debugfs_setup);
+
+#if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_ARCH_HAS_MEMREMAP_COMPAT_ALIGN)
+/*
+ * Override the generic version in mm/memremap.c.
+ *
+ * With hash translation, the direct-map range is mapped with just one
+ * page size selected by htab_init_page_sizes(). Consult
+ * mmu_psize_defs[] to determine the minimum page size alignment.
+*/
+unsigned long memremap_compat_align(void)
+{
+ if (!radix_enabled()) {
+ unsigned int shift = mmu_psize_defs[mmu_linear_psize].shift;
+ return max(SUBSECTION_SIZE, 1UL << shift);
+ }
+
+ return SUBSECTION_SIZE;
+}
+EXPORT_SYMBOL_GPL(memremap_compat_align);
+#endif
diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c
index a2d9ad138709..753e62ba67af 100644
--- a/arch/powerpc/mm/book3s64/pkeys.c
+++ b/arch/powerpc/mm/book3s64/pkeys.c
@@ -66,7 +66,7 @@ static int __init dt_scan_storage_keys(unsigned long node,
return 1;
}
-static int scan_pkey_feature(void)
+static int __init scan_pkey_feature(void)
{
int ret;
int pkeys_total = 0;
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 3a600bd7fbc6..def04631a74d 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -33,7 +33,6 @@
#include <trace/events/thp.h>
-unsigned int mmu_pid_bits;
unsigned int mmu_base_pid;
unsigned long radix_mem_block_size __ro_after_init;
@@ -335,7 +334,7 @@ static void __init radix_init_pgtable(void)
u64 i;
/* We don't support slb for radix */
- mmu_slb_size = 0;
+ slb_set_size(0);
/*
* Create the linear mapping
@@ -357,18 +356,13 @@ static void __init radix_init_pgtable(void)
-1, PAGE_KERNEL));
}
- /* Find out how many PID bits are supported */
if (!cpu_has_feature(CPU_FTR_HVMODE) &&
cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
/*
* Older versions of KVM on these machines perfer if the
* guest only uses the low 19 PID bits.
*/
- if (!mmu_pid_bits)
- mmu_pid_bits = 19;
- } else {
- if (!mmu_pid_bits)
- mmu_pid_bits = 20;
+ mmu_pid_bits = 19;
}
mmu_base_pid = 1;
@@ -449,11 +443,6 @@ static int __init radix_dt_scan_page_sizes(unsigned long node,
if (type == NULL || strcmp(type, "cpu") != 0)
return 0;
- /* Find MMU PID size */
- prop = of_get_flat_dt_prop(node, "ibm,mmu-pid-bits", &size);
- if (prop && size == 4)
- mmu_pid_bits = be32_to_cpup(prop);
-
/* Grab page size encodings */
prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size);
if (!prop)
@@ -510,7 +499,7 @@ static int __init probe_memory_block_size(unsigned long node, const char *uname,
return 1;
}
-static unsigned long radix_memory_block_size(void)
+static unsigned long __init radix_memory_block_size(void)
{
unsigned long mem_block_size = MIN_MEMORY_BLOCK_SIZE;
@@ -528,7 +517,7 @@ static unsigned long radix_memory_block_size(void)
#else /* CONFIG_MEMORY_HOTPLUG */
-static unsigned long radix_memory_block_size(void)
+static unsigned long __init radix_memory_block_size(void)
{
return 1UL * 1024 * 1024 * 1024;
}
@@ -572,22 +561,11 @@ void __init radix__early_init_devtree(void)
return;
}
-static void radix_init_amor(void)
-{
- /*
- * In HV mode, we init AMOR (Authority Mask Override Register) so that
- * the hypervisor and guest can setup IAMR (Instruction Authority Mask
- * Register), enable key 0 and set it to 1.
- *
- * AMOR = 0b1100 .... 0000 (Mask for key 0 is 11)
- */
- mtspr(SPRN_AMOR, (3ul << 62));
-}
-
void __init radix__early_init_mmu(void)
{
unsigned long lpcr;
+#ifdef CONFIG_PPC_64S_HASH_MMU
#ifdef CONFIG_PPC_64K_PAGES
/* PAGE_SIZE mappings */
mmu_virtual_psize = MMU_PAGE_64K;
@@ -605,6 +583,7 @@ void __init radix__early_init_mmu(void)
} else
mmu_vmemmap_psize = mmu_virtual_psize;
#endif
+#endif
/*
* initialize page table size
*/
@@ -644,7 +623,6 @@ void __init radix__early_init_mmu(void)
lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
radix_init_partition_table();
- radix_init_amor();
} else {
radix_init_pseries();
}
@@ -668,8 +646,6 @@ void radix__early_init_mmu_secondary(void)
set_ptcr_when_no_uv(__pa(partition_tb) |
(PATB_SIZE_SHIFT - 12));
-
- radix_init_amor();
}
radix__switch_mmu_context(NULL, &init_mm);
@@ -1100,7 +1076,7 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
int pud_clear_huge(pud_t *pud)
{
- if (pud_huge(*pud)) {
+ if (pud_is_leaf(*pud)) {
pud_clear(pud);
return 1;
}
@@ -1147,7 +1123,7 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
int pmd_clear_huge(pmd_t *pmd)
{
- if (pmd_huge(*pmd)) {
+ if (pmd_is_leaf(*pmd)) {
pmd_clear(pmd);
return 1;
}
diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c
index f0037bcc47a0..31f4cef3adac 100644
--- a/arch/powerpc/mm/book3s64/slb.c
+++ b/arch/powerpc/mm/book3s64/slb.c
@@ -868,19 +868,3 @@ DEFINE_INTERRUPT_HANDLER_RAW(do_slb_fault)
return err;
}
}
-
-DEFINE_INTERRUPT_HANDLER(do_bad_slb_fault)
-{
- int err = regs->result;
-
- if (err == -EFAULT) {
- if (user_mode(regs))
- _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar);
- else
- bad_page_fault(regs, SIGSEGV);
- } else if (err == -EINVAL) {
- unrecoverable_exception(regs);
- } else {
- BUG();
- }
-}
diff --git a/arch/powerpc/mm/book3s64/trace.c b/arch/powerpc/mm/book3s64/trace.c
new file mode 100644
index 000000000000..b86e7b906257
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/trace.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This file is for defining trace points and trace related helpers.
+ */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define CREATE_TRACE_POINTS
+#include <trace/events/thp.h>
+#endif
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index 8acd00178956..c1cb21a00884 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -82,6 +82,7 @@ out_unlock:
}
EXPORT_SYMBOL_GPL(copro_handle_mm_fault);
+#ifdef CONFIG_PPC_64S_HASH_MMU
int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
{
u64 vsid, vsidkey;
@@ -146,3 +147,4 @@ void copro_flush_all_slbs(struct mm_struct *mm)
cxl_slbia(mm);
}
EXPORT_SYMBOL_GPL(copro_flush_all_slbs);
+#endif
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index a8d0ce85d39a..2d4a411c7c85 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -35,6 +35,7 @@
#include <linux/kfence.h>
#include <linux/pkeys.h>
+#include <asm/asm-prototypes.h>
#include <asm/firmware.h>
#include <asm/interrupt.h>
#include <asm/page.h>
@@ -620,4 +621,27 @@ DEFINE_INTERRUPT_HANDLER(do_bad_page_fault_segv)
{
bad_page_fault(regs, SIGSEGV);
}
+
+/*
+ * In radix, segment interrupts indicate the EA is not addressable by the
+ * page table geometry, so they are always sent here.
+ *
+ * In hash, this is called if do_slb_fault returns error. Typically it is
+ * because the EA was outside the region allowed by software.
+ */
+DEFINE_INTERRUPT_HANDLER(do_bad_segment_interrupt)
+{
+ int err = regs->result;
+
+ if (err == -EFAULT) {
+ if (user_mode(regs))
+ _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar);
+ else
+ bad_page_fault(regs, SIGSEGV);
+ } else if (err == -EINVAL) {
+ unrecoverable_exception(regs);
+ } else {
+ BUG();
+ }
+}
#endif
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 82d8b368ca6d..ddead41e2194 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -542,20 +542,26 @@ retry:
return page;
}
-#ifdef CONFIG_PPC_MM_SLICES
+#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+static inline int file_to_psize(struct file *file)
+{
+ struct hstate *hstate = hstate_file(file);
+ return shift_to_mmu_psize(huge_page_shift(hstate));
+}
+
unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long len, unsigned long pgoff,
unsigned long flags)
{
- struct hstate *hstate = hstate_file(file);
- int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
-
#ifdef CONFIG_PPC_RADIX_MMU
if (radix_enabled())
return radix__hugetlb_get_unmapped_area(file, addr, len,
pgoff, flags);
#endif
- return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1);
+#ifdef CONFIG_PPC_MM_SLICES
+ return slice_get_unmapped_area(addr, len, flags, file_to_psize(file), 1);
+#endif
+ BUG();
}
#endif
diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
index 3a82f89827a5..119ef491f797 100644
--- a/arch/powerpc/mm/init-common.c
+++ b/arch/powerpc/mm/init-common.c
@@ -20,6 +20,7 @@
#include <linux/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/kup.h>
+#include <asm/smp.h>
phys_addr_t memstart_addr __ro_after_init = (phys_addr_t)~0ull;
EXPORT_SYMBOL_GPL(memstart_addr);
@@ -33,6 +34,9 @@ bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP);
static int __init parse_nosmep(char *p)
{
+ if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+ return 0;
+
disable_kuep = true;
pr_warn("Disabling Kernel Userspace Execution Prevention\n");
return 0;
@@ -47,6 +51,23 @@ static int __init parse_nosmap(char *p)
}
early_param("nosmap", parse_nosmap);
+void __weak setup_kuep(bool disabled)
+{
+ if (!IS_ENABLED(CONFIG_PPC_KUEP) || disabled)
+ return;
+
+ if (smp_processor_id() != boot_cpuid)
+ return;
+
+ pr_info("Activating Kernel Userspace Execution Prevention\n");
+}
+
+void setup_kup(void)
+{
+ setup_kuap(disable_kuap);
+ setup_kuep(disable_kuep);
+}
+
#define CTOR(shift) static void ctor_##shift(void *addr) \
{ \
memset(addr, 0, sizeof(void *) << (shift)); \
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 386be136026e..35f46bf54281 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -370,6 +370,9 @@ void register_page_bootmem_memmap(unsigned long section_nr,
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
#ifdef CONFIG_PPC_BOOK3S_64
+unsigned int mmu_lpid_bits;
+unsigned int mmu_pid_bits;
+
static bool disable_radix = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);
static int __init parse_disable_radix(char *p)
@@ -437,11 +440,56 @@ static void __init early_check_vec5(void)
}
}
+static int __init dt_scan_mmu_pid_width(unsigned long node,
+ const char *uname, int depth,
+ void *data)
+{
+ int size = 0;
+ const __be32 *prop;
+ const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+
+ /* We are scanning "cpu" nodes only */
+ if (type == NULL || strcmp(type, "cpu") != 0)
+ return 0;
+
+ /* Find MMU LPID, PID register size */
+ prop = of_get_flat_dt_prop(node, "ibm,mmu-lpid-bits", &size);
+ if (prop && size == 4)
+ mmu_lpid_bits = be32_to_cpup(prop);
+
+ prop = of_get_flat_dt_prop(node, "ibm,mmu-pid-bits", &size);
+ if (prop && size == 4)
+ mmu_pid_bits = be32_to_cpup(prop);
+
+ if (!mmu_pid_bits && !mmu_lpid_bits)
+ return 0;
+
+ return 1;
+}
+
void __init mmu_early_init_devtree(void)
{
+ bool hvmode = !!(mfmsr() & MSR_HV);
+
/* Disable radix mode based on kernel command line. */
- if (disable_radix)
- cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+ if (disable_radix) {
+ if (IS_ENABLED(CONFIG_PPC_64S_HASH_MMU))
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+ else
+ pr_warn("WARNING: Ignoring cmdline option disable_radix\n");
+ }
+
+ of_scan_flat_dt(dt_scan_mmu_pid_width, NULL);
+ if (hvmode && !mmu_lpid_bits) {
+ if (early_cpu_has_feature(CPU_FTR_ARCH_207S))
+ mmu_lpid_bits = 12; /* POWER8-10 */
+ else
+ mmu_lpid_bits = 10; /* POWER7 */
+ }
+ if (!mmu_pid_bits) {
+ if (early_cpu_has_feature(CPU_FTR_ARCH_300))
+ mmu_pid_bits = 20; /* POWER9-10 */
+ }
/*
* Check /chosen/ibm,architecture-vec-5 if running as a guest.
@@ -449,11 +497,12 @@ void __init mmu_early_init_devtree(void)
* even though the ibm,architecture-vec-5 property created by
* skiboot doesn't have the necessary bits set.
*/
- if (!(mfmsr() & MSR_HV))
+ if (!hvmode)
early_check_vec5();
if (early_radix_enabled()) {
radix__early_init_devtree();
+
/*
* We have finalized the translation we are going to use by now.
* Radix mode is not limited by RMA / VRMA addressing.
@@ -463,5 +512,9 @@ void __init mmu_early_init_devtree(void)
memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
} else
hash__early_init_devtree();
+
+ if (!(cur_cpu_spec->mmu_features & MMU_FTR_HPTE_TABLE) &&
+ !(cur_cpu_spec->mmu_features & MMU_FTR_TYPE_RADIX))
+ panic("kernel does not support any MMU type offered by platform");
}
#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/mm/ioremap.c b/arch/powerpc/mm/ioremap.c
index 57342154d2b0..4f12504fb405 100644
--- a/arch/powerpc/mm/ioremap.c
+++ b/arch/powerpc/mm/ioremap.c
@@ -98,23 +98,3 @@ void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size,
return NULL;
}
-
-#ifdef CONFIG_ZONE_DEVICE
-/*
- * Override the generic version in mm/memremap.c.
- *
- * With hash translation, the direct-map range is mapped with just one
- * page size selected by htab_init_page_sizes(). Consult
- * mmu_psize_defs[] to determine the minimum page size alignment.
-*/
-unsigned long memremap_compat_align(void)
-{
- unsigned int shift = mmu_psize_defs[mmu_linear_psize].shift;
-
- if (radix_enabled())
- return SUBSECTION_SIZE;
- return max(SUBSECTION_SIZE, 1UL << shift);
-
-}
-EXPORT_SYMBOL_GPL(memremap_compat_align);
-#endif
diff --git a/arch/powerpc/mm/kasan/book3s_32.c b/arch/powerpc/mm/kasan/book3s_32.c
index 202bd260a009..35b287b0a8da 100644
--- a/arch/powerpc/mm/kasan/book3s_32.c
+++ b/arch/powerpc/mm/kasan/book3s_32.c
@@ -19,7 +19,8 @@ int __init kasan_init_region(void *start, size_t size)
block = memblock_alloc(k_size, k_size_base);
if (block && k_size_base >= SZ_128K && k_start == ALIGN(k_start, k_size_base)) {
- int k_size_more = 1 << (ffs(k_size - k_size_base) - 1);
+ int shift = ffs(k_size - k_size_base);
+ int k_size_more = shift ? 1 << (shift - 1) : 0;
setbat(-1, k_start, __pa(block), k_size_base, PAGE_KERNEL);
if (k_size_more >= SZ_128K)
diff --git a/arch/powerpc/mm/maccess.c b/arch/powerpc/mm/maccess.c
index aad7c47e0030..ea821d0ffe16 100644
--- a/arch/powerpc/mm/maccess.c
+++ b/arch/powerpc/mm/maccess.c
@@ -11,20 +11,3 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
{
return is_kernel_addr((unsigned long)unsafe_src);
}
-
-int copy_inst_from_kernel_nofault(struct ppc_inst *inst, u32 *src)
-{
- unsigned int val, suffix;
- int err;
-
- err = copy_from_kernel_nofault(&val, src, sizeof(val));
- if (err)
- return err;
- if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) {
- err = copy_from_kernel_nofault(&suffix, src + 1, sizeof(suffix));
- *inst = ppc_inst_prefix(val, suffix);
- } else {
- *inst = ppc_inst(val);
- }
- return err;
-}
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index bd5d91a31183..8e301cd8925b 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -26,7 +26,6 @@
#include <mm/mmu_decl.h>
unsigned long long memory_limit;
-bool init_mem_is_free;
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
EXPORT_SYMBOL(empty_zero_page);
@@ -312,7 +311,6 @@ void free_initmem(void)
{
ppc_md.progress = ppc_printk_progress;
mark_initmem_nx();
- init_mem_is_free = true;
free_initmem_default(POISON_FREE_INITMEM);
}
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index ae683fdc716c..c475cf810aa8 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -80,6 +80,7 @@ static inline unsigned long mmap_base(unsigned long rnd,
return PAGE_ALIGN(DEFAULT_MAP_WINDOW - gap - rnd);
}
+#ifdef HAVE_ARCH_UNMAPPED_AREA
#ifdef CONFIG_PPC_RADIX_MMU
/*
* Same function as generic code used only for radix, because we don't need to overload
@@ -181,11 +182,42 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
*/
return radix__arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
}
+#endif
+
+unsigned long arch_get_unmapped_area(struct file *filp,
+ unsigned long addr,
+ unsigned long len,
+ unsigned long pgoff,
+ unsigned long flags)
+{
+#ifdef CONFIG_PPC_MM_SLICES
+ return slice_get_unmapped_area(addr, len, flags,
+ mm_ctx_user_psize(&current->mm->context), 0);
+#else
+ BUG();
+#endif
+}
+
+unsigned long arch_get_unmapped_area_topdown(struct file *filp,
+ const unsigned long addr0,
+ const unsigned long len,
+ const unsigned long pgoff,
+ const unsigned long flags)
+{
+#ifdef CONFIG_PPC_MM_SLICES
+ return slice_get_unmapped_area(addr0, len, flags,
+ mm_ctx_user_psize(&current->mm->context), 1);
+#else
+ BUG();
+#endif
+}
+#endif /* HAVE_ARCH_UNMAPPED_AREA */
static void radix__arch_pick_mmap_layout(struct mm_struct *mm,
unsigned long random_factor,
struct rlimit *rlim_stack)
{
+#ifdef CONFIG_PPC_RADIX_MMU
if (mmap_is_legacy(rlim_stack)) {
mm->mmap_base = TASK_UNMAPPED_BASE;
mm->get_unmapped_area = radix__arch_get_unmapped_area;
@@ -193,13 +225,9 @@ static void radix__arch_pick_mmap_layout(struct mm_struct *mm,
mm->mmap_base = mmap_base(random_factor, rlim_stack);
mm->get_unmapped_area = radix__arch_get_unmapped_area_topdown;
}
-}
-#else
-/* dummy */
-extern void radix__arch_pick_mmap_layout(struct mm_struct *mm,
- unsigned long random_factor,
- struct rlimit *rlim_stack);
#endif
+}
+
/*
* This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use:
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index 74246536b832..1fb9c99f8679 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -18,6 +18,12 @@ static inline void switch_mm_pgdir(struct task_struct *tsk,
{
/* 32-bit keeps track of the current PGDIR in the thread struct */
tsk->thread.pgdir = mm->pgd;
+#ifdef CONFIG_PPC_BOOK3S_32
+ tsk->thread.sr0 = mm->context.sr0;
+#endif
+#if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP)
+ tsk->thread.pid = mm->context.id;
+#endif
}
#elif defined(CONFIG_PPC_BOOK3E_64)
static inline void switch_mm_pgdir(struct task_struct *tsk,
@@ -25,6 +31,9 @@ static inline void switch_mm_pgdir(struct task_struct *tsk,
{
/* 64-bit Book3E keeps track of current PGD in the PACA */
get_paca()->pgd = mm->pgd;
+#ifdef CONFIG_PPC_KUAP
+ tsk->thread.pid = mm->context.id;
+#endif
}
#else
static inline void switch_mm_pgdir(struct task_struct *tsk,
@@ -81,7 +90,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* context
*/
if (cpu_has_feature(CPU_FTR_ALTIVEC))
- asm volatile ("dssall");
+ asm volatile (PPC_DSSALL);
if (!new_on_cpu)
membarrier_arch_switch_mm(prev, next, tsk);
diff --git a/arch/powerpc/mm/nohash/44x.c b/arch/powerpc/mm/nohash/44x.c
index e079f26b267e..1beae802bb1c 100644
--- a/arch/powerpc/mm/nohash/44x.c
+++ b/arch/powerpc/mm/nohash/44x.c
@@ -38,7 +38,7 @@ int icache_44x_need_flush;
unsigned long tlb_47x_boltmap[1024/8];
-static void ppc44x_update_tlb_hwater(void)
+static void __init ppc44x_update_tlb_hwater(void)
{
/* The TLB miss handlers hard codes the watermark in a cmpli
* instruction to improve performances rather than loading it
@@ -122,7 +122,7 @@ static void __init ppc47x_update_boltmap(void)
/*
* "Pins" a 256MB TLB entry in AS0 for kernel lowmem for 47x type MMU
*/
-static void ppc47x_pin_tlb(unsigned int virt, unsigned int phys)
+static void __init ppc47x_pin_tlb(unsigned int virt, unsigned int phys)
{
unsigned int rA;
int bolted;
@@ -240,19 +240,3 @@ void __init mmu_init_secondary(int cpu)
}
}
#endif /* CONFIG_SMP */
-
-#ifdef CONFIG_PPC_KUEP
-void setup_kuep(bool disabled)
-{
- if (smp_processor_id() != boot_cpuid)
- return;
-
- if (disabled)
- patch_instruction_site(&patch__tlb_44x_kuep, ppc_inst(PPC_RAW_NOP()));
- else
- pr_info("Activating Kernel Userspace Execution Prevention\n");
-
- if (IS_ENABLED(CONFIG_PPC_47x) && disabled)
- patch_instruction_site(&patch__tlb_47x_kuep, ppc_inst(PPC_RAW_NOP()));
-}
-#endif
diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index 0df9fe29dd56..27f9186ae374 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -8,11 +8,7 @@
*/
#include <linux/memblock.h>
-#include <linux/mmu_context.h>
#include <linux/hugetlb.h>
-#include <asm/fixmap.h>
-#include <asm/code-patching.h>
-#include <asm/inst.h>
#include <mm/mmu_decl.h>
@@ -212,35 +208,6 @@ void __init setup_initial_memory_limit(phys_addr_t first_memblock_base,
memblock_set_current_limit(min_t(u64, first_memblock_size, SZ_32M));
}
-#ifdef CONFIG_PPC_KUEP
-void __init setup_kuep(bool disabled)
-{
- if (disabled)
- return;
-
- pr_info("Activating Kernel Userspace Execution Prevention\n");
-
- mtspr(SPRN_MI_AP, MI_APG_KUEP);
-}
-#endif
-
-#ifdef CONFIG_PPC_KUAP
-struct static_key_false disable_kuap_key;
-EXPORT_SYMBOL(disable_kuap_key);
-
-void __init setup_kuap(bool disabled)
-{
- if (disabled) {
- static_branch_enable(&disable_kuap_key);
- return;
- }
-
- pr_info("Activating Kernel Userspace Access Protection\n");
-
- mtspr(SPRN_MD_AP, MD_APG_KUAP);
-}
-#endif
-
int pud_clear_huge(pud_t *pud)
{
return 0;
diff --git a/arch/powerpc/mm/nohash/Makefile b/arch/powerpc/mm/nohash/Makefile
index b1f630d423d8..b467a25ee155 100644
--- a/arch/powerpc/mm/nohash/Makefile
+++ b/arch/powerpc/mm/nohash/Makefile
@@ -2,7 +2,7 @@
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
-obj-y += mmu_context.o tlb.o tlb_low.o
+obj-y += mmu_context.o tlb.o tlb_low.o kup.o
obj-$(CONFIG_PPC_BOOK3E_64) += tlb_low_64e.o book3e_pgtable.o
obj-$(CONFIG_40x) += 40x.o
obj-$(CONFIG_44x) += 44x.o
diff --git a/arch/powerpc/mm/nohash/book3e_pgtable.c b/arch/powerpc/mm/nohash/book3e_pgtable.c
index 77884e24281d..7d4368d055a6 100644
--- a/arch/powerpc/mm/nohash/book3e_pgtable.c
+++ b/arch/powerpc/mm/nohash/book3e_pgtable.c
@@ -10,6 +10,7 @@
#include <asm/pgalloc.h>
#include <asm/tlb.h>
#include <asm/dma.h>
+#include <asm/code-patching.h>
#include <mm/mmu_decl.h>
@@ -115,3 +116,17 @@ int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
smp_wmb();
return 0;
}
+
+void __patch_exception(int exc, unsigned long addr)
+{
+ unsigned int *ibase = &interrupt_base_book3e;
+
+ /*
+ * Our exceptions vectors start with a NOP and -then- a branch
+ * to deal with single stepping from userspace which stops on
+ * the second instruction. Thus we need to patch the second
+ * instruction of the exception, not the first one.
+ */
+
+ patch_branch(ibase + (exc / 4) + 1, addr, 0);
+}
diff --git a/arch/powerpc/mm/nohash/fsl_book3e.c b/arch/powerpc/mm/nohash/fsl_book3e.c
index b231a54f540c..dfe715e0f70a 100644
--- a/arch/powerpc/mm/nohash/fsl_book3e.c
+++ b/arch/powerpc/mm/nohash/fsl_book3e.c
@@ -60,11 +60,6 @@ struct tlbcamrange {
phys_addr_t phys;
} tlbcam_addrs[NUM_TLBCAMS];
-unsigned long tlbcam_sz(int idx)
-{
- return tlbcam_addrs[idx].limit - tlbcam_addrs[idx].start + 1;
-}
-
#ifdef CONFIG_FSL_BOOKE
/*
* Return PA for this VA if it is mapped by a CAM, or 0
@@ -264,6 +259,11 @@ void __init MMU_init_hw(void)
flush_instruction_cache();
}
+static unsigned long __init tlbcam_sz(int idx)
+{
+ return tlbcam_addrs[idx].limit - tlbcam_addrs[idx].start + 1;
+}
+
void __init adjust_total_lowmem(void)
{
unsigned long ram;
diff --git a/arch/powerpc/mm/nohash/kup.c b/arch/powerpc/mm/nohash/kup.c
new file mode 100644
index 000000000000..552becf90e97
--- /dev/null
+++ b/arch/powerpc/mm/nohash/kup.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This file contains the routines for initializing kernel userspace protection
+ */
+
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/jump_label.h>
+#include <linux/printk.h>
+#include <linux/smp.h>
+
+#include <asm/kup.h>
+#include <asm/smp.h>
+
+#ifdef CONFIG_PPC_KUAP
+struct static_key_false disable_kuap_key;
+EXPORT_SYMBOL(disable_kuap_key);
+
+void setup_kuap(bool disabled)
+{
+ if (disabled) {
+ if (IS_ENABLED(CONFIG_40x))
+ disable_kuep = true;
+ if (smp_processor_id() == boot_cpuid)
+ static_branch_enable(&disable_kuap_key);
+ return;
+ }
+
+ pr_info("Activating Kernel Userspace Access Protection\n");
+
+ __prevent_user_access(KUAP_READ_WRITE);
+}
+#endif
diff --git a/arch/powerpc/mm/nohash/mmu_context.c b/arch/powerpc/mm/nohash/mmu_context.c
index 44b2b5e7cabe..85b048f04c56 100644
--- a/arch/powerpc/mm/nohash/mmu_context.c
+++ b/arch/powerpc/mm/nohash/mmu_context.c
@@ -33,6 +33,7 @@
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
#include <asm/smp.h>
+#include <asm/kup.h>
#include <mm/mmu_decl.h>
@@ -217,7 +218,7 @@ static void set_context(unsigned long id, pgd_t *pgd)
/* sync */
mb();
- } else {
+ } else if (kuap_is_disabled()) {
if (IS_ENABLED(CONFIG_40x))
mb(); /* sync */
@@ -305,6 +306,9 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
if (IS_ENABLED(CONFIG_BDI_SWITCH))
abatron_pteptrs[1] = next->pgd;
set_context(id, next->pgd);
+#if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP)
+ tsk->thread.pid = id;
+#endif
raw_spin_unlock(&context_lock);
}
diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c
index 647bf454a0fa..fd2c77af5c55 100644
--- a/arch/powerpc/mm/nohash/tlb.c
+++ b/arch/powerpc/mm/nohash/tlb.c
@@ -150,7 +150,6 @@ static inline int mmu_get_tsize(int psize)
*/
#ifdef CONFIG_PPC64
-int mmu_linear_psize; /* Page size used for the linear mapping */
int mmu_pte_psize; /* Page size used for PTE pages */
int mmu_vmemmap_psize; /* Page size used for the virtual mem map */
int book3e_htw_mode; /* HW tablewalk? Value is PPC_HTW_* */
@@ -433,7 +432,7 @@ void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address)
}
}
-static void setup_page_sizes(void)
+static void __init setup_page_sizes(void)
{
unsigned int tlb0cfg;
unsigned int tlb0ps;
@@ -571,7 +570,7 @@ out:
}
}
-static void setup_mmu_htw(void)
+static void __init setup_mmu_htw(void)
{
/*
* If we want to use HW tablewalk, enable it by patching the TLB miss
@@ -657,14 +656,6 @@ static void early_init_this_mmu(void)
static void __init early_init_mmu_global(void)
{
- /* XXX This will have to be decided at runtime, but right
- * now our boot and TLB miss code hard wires it. Ideally
- * we should find out a suitable page size and patch the
- * TLB miss code (either that or use the PACA to store
- * the value we want)
- */
- mmu_linear_psize = MMU_PAGE_1G;
-
/* XXX This should be decided at runtime based on supported
* page sizes in the TLB, but for now let's assume 16M is
* always there and a good fit (which it probably is)
diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S
index 9235e720e357..8b97c4acfebf 100644
--- a/arch/powerpc/mm/nohash/tlb_low_64e.S
+++ b/arch/powerpc/mm/nohash/tlb_low_64e.S
@@ -128,6 +128,13 @@ END_BTB_FLUSH_SECTION
bne tlb_miss_kernel_bolted
+tlb_miss_user_bolted:
+#ifdef CONFIG_PPC_KUAP
+ mfspr r10,SPRN_MAS1
+ rlwinm. r10,r10,0,0x3fff0000
+ beq- tlb_miss_fault_bolted /* KUAP fault */
+#endif
+
tlb_miss_common_bolted:
/*
* This is the guts of the TLB miss handler for bolted-linear.
@@ -246,7 +253,7 @@ itlb_miss_fault_bolted:
cmpldi cr0,r15,0 /* Check for user region */
oris r11,r11,_PAGE_ACCESSED@h
- beq tlb_miss_common_bolted
+ beq tlb_miss_user_bolted
b itlb_miss_kernel_bolted
#ifdef CONFIG_PPC_FSL_BOOK3E
@@ -676,6 +683,11 @@ finish_normal_tlb_miss:
/* Check if required permissions are met */
andc. r15,r11,r14
bne- normal_tlb_miss_access_fault
+#ifdef CONFIG_PPC_KUAP
+ mfspr r11,SPRN_MAS1
+ rlwinm. r10,r11,0,0x3fff0000
+ beq- normal_tlb_miss_access_fault /* KUAP fault */
+#endif
/* Now we build the MAS:
*
@@ -689,15 +701,17 @@ finish_normal_tlb_miss:
*
* TODO: mix up code below for better scheduling
*/
- clrrdi r11,r16,12 /* Clear low crap in EA */
- rlwimi r11,r14,32-19,27,31 /* Insert WIMGE */
- mtspr SPRN_MAS2,r11
+ clrrdi r10,r16,12 /* Clear low crap in EA */
+ rlwimi r10,r14,32-19,27,31 /* Insert WIMGE */
+ mtspr SPRN_MAS2,r10
/* Check page size, if not standard, update MAS1 */
- rldicl r11,r14,64-8,64-8
- cmpldi cr0,r11,BOOK3E_PAGESZ_4K
+ rldicl r10,r14,64-8,64-8
+ cmpldi cr0,r10,BOOK3E_PAGESZ_4K
beq- 1f
+#ifndef CONFIG_PPC_KUAP
mfspr r11,SPRN_MAS1
+#endif
rlwimi r11,r14,31,21,24
rlwinm r11,r11,0,21,19
mtspr SPRN_MAS1,r11
@@ -786,7 +800,16 @@ virt_page_table_tlb_miss:
mfspr r10,SPRN_MAS1
rlwinm r10,r10,0,16,1 /* Clear TID */
mtspr SPRN_MAS1,r10
+#ifdef CONFIG_PPC_KUAP
+ b 2f
+1:
+ mfspr r10,SPRN_MAS1
+ rlwinm. r10,r10,0,0x3fff0000
+ beq- virt_page_table_tlb_miss_fault /* KUAP fault */
+2:
+#else
1:
+#endif
BEGIN_MMU_FTR_SECTION
/* Search if we already have a TLB entry for that virtual address, and
* if we do, bail out.
@@ -1027,6 +1050,11 @@ virt_page_table_tlb_miss_whacko_fault:
* avoid too much complication, it will save/restore things for us
*/
htw_tlb_miss:
+#ifdef CONFIG_PPC_KUAP
+ mfspr r10,SPRN_MAS1
+ rlwinm. r10,r10,0,0x3fff0000
+ beq- htw_tlb_miss_fault /* KUAP fault */
+#endif
/* Search if we already have a TLB entry for that virtual address, and
* if we do, bail out.
*
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 59d3cfcd7887..9d5f710d2c20 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -134,7 +134,7 @@ static int __init fake_numa_create_new_node(unsigned long end_pfn,
return 0;
}
-static void reset_numa_cpu_lookup_table(void)
+static void __init reset_numa_cpu_lookup_table(void)
{
unsigned int cpu;
@@ -372,7 +372,7 @@ void update_numa_distance(struct device_node *node)
* ibm,numa-lookup-index-table= {N, domainid1, domainid2, ..... domainidN}
* ibm,numa-distance-table = { N, 1, 2, 4, 5, 1, 6, .... N elements}
*/
-static void initialize_form2_numa_distance_lookup_table(void)
+static void __init initialize_form2_numa_distance_lookup_table(void)
{
int i, j;
struct device_node *root;
@@ -581,7 +581,7 @@ static int of_get_assoc_arrays(struct assoc_arrays *aa)
return 0;
}
-static int get_nid_and_numa_distance(struct drmem_lmb *lmb)
+static int __init get_nid_and_numa_distance(struct drmem_lmb *lmb)
{
struct assoc_arrays aa = { .arrays = NULL };
int default_nid = NUMA_NO_NODE;
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index ce9482383144..abb3198bd277 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -81,9 +81,6 @@ static struct page *maybe_pte_to_page(pte_t pte)
static pte_t set_pte_filter_hash(pte_t pte)
{
- if (radix_enabled())
- return pte;
-
pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) ||
cpu_has_feature(CPU_FTR_NOEXECUTE))) {
@@ -112,6 +109,9 @@ static inline pte_t set_pte_filter(pte_t pte)
{
struct page *pg;
+ if (radix_enabled())
+ return pte;
+
if (mmu_has_feature(MMU_FTR_HPTE_TABLE))
return set_pte_filter_hash(pte);
@@ -144,6 +144,9 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
{
struct page *pg;
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+ return pte;
+
if (mmu_has_feature(MMU_FTR_HPTE_TABLE))
return pte;
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 78c8cf01db5f..175aabf101e8 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -102,7 +102,8 @@ EXPORT_SYMBOL(__pte_frag_size_shift);
struct page *p4d_page(p4d_t p4d)
{
if (p4d_is_leaf(p4d)) {
- VM_WARN_ON(!p4d_huge(p4d));
+ if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
+ VM_WARN_ON(!p4d_huge(p4d));
return pte_page(p4d_pte(p4d));
}
return virt_to_page(p4d_pgtable(p4d));
@@ -112,7 +113,8 @@ struct page *p4d_page(p4d_t p4d)
struct page *pud_page(pud_t pud)
{
if (pud_is_leaf(pud)) {
- VM_WARN_ON(!pud_huge(pud));
+ if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
+ VM_WARN_ON(!pud_huge(pud));
return pte_page(pud_pte(pud));
}
return virt_to_page(pud_pgtable(pud));
@@ -125,7 +127,13 @@ struct page *pud_page(pud_t pud)
struct page *pmd_page(pmd_t pmd)
{
if (pmd_is_leaf(pmd)) {
- VM_WARN_ON(!(pmd_large(pmd) || pmd_huge(pmd)));
+ /*
+ * vmalloc_to_page may be called on any vmap address (not only
+ * vmalloc), and it uses pmd_page() etc., when huge vmap is
+ * enabled so these checks can't be used.
+ */
+ if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
+ VM_WARN_ON(!(pmd_large(pmd) || pmd_huge(pmd)));
return pte_page(pmd_pte(pmd));
}
return virt_to_page(pmd_page_vaddr(pmd));
diff --git a/arch/powerpc/mm/ptdump/Makefile b/arch/powerpc/mm/ptdump/Makefile
index 4050cbb55acf..b533caaf0910 100644
--- a/arch/powerpc/mm/ptdump/Makefile
+++ b/arch/powerpc/mm/ptdump/Makefile
@@ -10,5 +10,5 @@ obj-$(CONFIG_PPC_BOOK3S_64) += book3s64.o
ifdef CONFIG_PTDUMP_DEBUGFS
obj-$(CONFIG_PPC_BOOK3S_32) += bats.o segment_regs.o
-obj-$(CONFIG_PPC_BOOK3S_64) += hashpagetable.o
+obj-$(CONFIG_PPC_64S_HASH_MMU) += hashpagetable.o
endif
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 32bfb215c485..8c846982766f 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -123,7 +123,7 @@ static struct ptdump_range ptdump_range[] __ro_after_init = {
void pt_dump_size(struct seq_file *m, unsigned long size)
{
- static const char units[] = "KMGTPE";
+ static const char units[] = " KMGTPE";
const char *unit = units;
/* Work out what appropriate unit to use */
@@ -176,7 +176,7 @@ static void dump_addr(struct pg_state *st, unsigned long addr)
pt_dump_seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1);
pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa);
- pt_dump_size(st->seq, (addr - st->start_address) >> 10);
+ pt_dump_size(st->seq, addr - st->start_address);
}
static void note_prot_wx(struct pg_state *st, unsigned long addr)
@@ -315,7 +315,7 @@ static int ptdump_show(struct seq_file *m, void *v)
DEFINE_SHOW_ATTRIBUTE(ptdump);
-static void build_pgtable_complete_mask(void)
+static void __init build_pgtable_complete_mask(void)
{
unsigned int i, j;
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 82b45b1cb973..f42711f865f3 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -639,26 +639,6 @@ return_addr:
}
EXPORT_SYMBOL_GPL(slice_get_unmapped_area);
-unsigned long arch_get_unmapped_area(struct file *filp,
- unsigned long addr,
- unsigned long len,
- unsigned long pgoff,
- unsigned long flags)
-{
- return slice_get_unmapped_area(addr, len, flags,
- mm_ctx_user_psize(&current->mm->context), 0);
-}
-
-unsigned long arch_get_unmapped_area_topdown(struct file *filp,
- const unsigned long addr0,
- const unsigned long len,
- const unsigned long pgoff,
- const unsigned long flags)
-{
- return slice_get_unmapped_area(addr0, len, flags,
- mm_ctx_user_psize(&current->mm->context), 1);
-}
-
unsigned int notrace get_slice_psize(struct mm_struct *mm, unsigned long addr)
{
unsigned char *psizes;
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 7e9b978b768e..b20a2a83a6e7 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -31,7 +31,7 @@
pr_err_ratelimited("Branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \
return -ERANGE; \
} \
- EMIT(PPC_INST_BRANCH | (offset & 0x03fffffc)); \
+ EMIT(PPC_RAW_BRANCH(offset)); \
} while (0)
/* blr; (unconditional 'branch' with link) to absolute address */
@@ -125,8 +125,7 @@
#define COND_LE (CR0_GT | COND_CMP_FALSE)
#define SEEN_FUNC 0x20000000 /* might call external helpers */
-#define SEEN_STACK 0x40000000 /* uses BPF stack */
-#define SEEN_TAILCALL 0x80000000 /* uses tail calls */
+#define SEEN_TAILCALL 0x40000000 /* uses tail calls */
#define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */
#define SEEN_NVREG_MASK 0x0003ffff /* Non volatile registers r14-r31 */
@@ -151,8 +150,15 @@ struct codegen_context {
unsigned int idx;
unsigned int stack_size;
int b2p[ARRAY_SIZE(b2p)];
+ unsigned int exentry_idx;
};
+#ifdef CONFIG_PPC32
+#define BPF_FIXUP_LEN 3 /* Three instructions => 12 bytes */
+#else
+#define BPF_FIXUP_LEN 2 /* Two instructions => 8 bytes */
+#endif
+
static inline void bpf_flush_icache(void *start, void *end)
{
smp_wmb(); /* smp write barrier */
@@ -176,11 +182,14 @@ static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
- u32 *addrs, bool extra_pass);
+ u32 *addrs, int pass);
void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
void bpf_jit_realloc_regs(struct codegen_context *ctx);
+int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
+ int insn_idx, int jmp_off, int dst_reg);
+
#endif
#endif
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 90ce75f0f1e2..d6ffdd0f2309 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -101,6 +101,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
struct bpf_prog *tmp_fp;
bool bpf_blinded = false;
bool extra_pass = false;
+ u32 extable_len;
+ u32 fixup_len;
if (!fp->jit_requested)
return org_fp;
@@ -131,7 +133,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
image = jit_data->image;
bpf_hdr = jit_data->header;
proglen = jit_data->proglen;
- alloclen = proglen + FUNCTION_DESCR_SIZE;
extra_pass = true;
goto skip_init_ctx;
}
@@ -149,7 +150,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
/* Scouting faux-generate pass 0 */
- if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
+ if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0)) {
/* We hit something illegal or unsupported. */
fp = org_fp;
goto out_addrs;
@@ -162,7 +163,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
*/
if (cgctx.seen & SEEN_TAILCALL) {
cgctx.idx = 0;
- if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
+ if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0)) {
fp = org_fp;
goto out_addrs;
}
@@ -177,8 +178,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
bpf_jit_build_prologue(0, &cgctx);
bpf_jit_build_epilogue(0, &cgctx);
+ fixup_len = fp->aux->num_exentries * BPF_FIXUP_LEN * 4;
+ extable_len = fp->aux->num_exentries * sizeof(struct exception_table_entry);
+
proglen = cgctx.idx * 4;
- alloclen = proglen + FUNCTION_DESCR_SIZE;
+ alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len + extable_len;
bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, bpf_jit_fill_ill_insns);
if (!bpf_hdr) {
@@ -186,6 +190,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
goto out_addrs;
}
+ if (extable_len)
+ fp->aux->extable = (void *)image + FUNCTION_DESCR_SIZE + proglen + fixup_len;
+
skip_init_ctx:
code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
@@ -210,7 +217,7 @@ skip_init_ctx:
/* Now build the prologue, body code & epilogue for real. */
cgctx.idx = 0;
bpf_jit_build_prologue(code_base, &cgctx);
- if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass)) {
+ if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, pass)) {
bpf_jit_binary_free(bpf_hdr);
fp = org_fp;
goto out_addrs;
@@ -238,7 +245,7 @@ skip_codegen_passes:
fp->bpf_func = (void *)image;
fp->jited = 1;
- fp->jited_len = alloclen;
+ fp->jited_len = proglen + FUNCTION_DESCR_SIZE;
bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
if (!fp->is_func || extra_pass) {
@@ -262,3 +269,52 @@ out:
return fp;
}
+
+/*
+ * The caller should check for (BPF_MODE(code) == BPF_PROBE_MEM) before calling
+ * this function, as this only applies to BPF_PROBE_MEM, for now.
+ */
+int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
+ int insn_idx, int jmp_off, int dst_reg)
+{
+ off_t offset;
+ unsigned long pc;
+ struct exception_table_entry *ex;
+ u32 *fixup;
+
+ /* Populate extable entries only in the last pass */
+ if (pass != 2)
+ return 0;
+
+ if (!fp->aux->extable ||
+ WARN_ON_ONCE(ctx->exentry_idx >= fp->aux->num_exentries))
+ return -EINVAL;
+
+ pc = (unsigned long)&image[insn_idx];
+
+ fixup = (void *)fp->aux->extable -
+ (fp->aux->num_exentries * BPF_FIXUP_LEN * 4) +
+ (ctx->exentry_idx * BPF_FIXUP_LEN * 4);
+
+ fixup[0] = PPC_RAW_LI(dst_reg, 0);
+ if (IS_ENABLED(CONFIG_PPC32))
+ fixup[1] = PPC_RAW_LI(dst_reg - 1, 0); /* clear higher 32-bit register too */
+
+ fixup[BPF_FIXUP_LEN - 1] =
+ PPC_RAW_BRANCH((long)(pc + jmp_off) - (long)&fixup[BPF_FIXUP_LEN - 1]);
+
+ ex = &fp->aux->extable[ctx->exentry_idx];
+
+ offset = pc - (long)&ex->insn;
+ if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
+ return -ERANGE;
+ ex->insn = offset;
+
+ offset = (long)fixup - (long)&ex->fixup;
+ if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
+ return -ERANGE;
+ ex->fixup = offset;
+
+ ctx->exentry_idx++;
+ return 0;
+}
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index 8a4faa05f9e4..faaebd446cad 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -268,7 +268,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o
/* Assemble the body code between the prologue & epilogue */
int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
- u32 *addrs, bool extra_pass)
+ u32 *addrs, int pass)
{
const struct bpf_insn *insn = fp->insnsi;
int flen = fp->len;
@@ -284,6 +284,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
u32 src_reg = bpf_to_ppc(ctx, insn[i].src_reg);
u32 src_reg_h = src_reg - 1;
u32 tmp_reg = bpf_to_ppc(ctx, TMP_REG);
+ u32 size = BPF_SIZE(code);
s16 off = insn[i].off;
s32 imm = insn[i].imm;
bool func_addr_fixed;
@@ -812,23 +813,91 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
* BPF_LDX
*/
case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
- EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
- if (!fp->aux->verifier_zext)
- EMIT(PPC_RAW_LI(dst_reg_h, 0));
- break;
+ case BPF_LDX | BPF_PROBE_MEM | BPF_B:
case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
- EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
- if (!fp->aux->verifier_zext)
- EMIT(PPC_RAW_LI(dst_reg_h, 0));
- break;
+ case BPF_LDX | BPF_PROBE_MEM | BPF_H:
case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
- EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
- if (!fp->aux->verifier_zext)
- EMIT(PPC_RAW_LI(dst_reg_h, 0));
- break;
+ case BPF_LDX | BPF_PROBE_MEM | BPF_W:
case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
- EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off));
- EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4));
+ case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
+ /*
+ * As PTR_TO_BTF_ID that uses BPF_PROBE_MEM mode could either be a valid
+ * kernel pointer or NULL but not a userspace address, execute BPF_PROBE_MEM
+ * load only if addr is kernel address (see is_kernel_addr()), otherwise
+ * set dst_reg=0 and move on.
+ */
+ if (BPF_MODE(code) == BPF_PROBE_MEM) {
+ PPC_LI32(_R0, TASK_SIZE - off);
+ EMIT(PPC_RAW_CMPLW(src_reg, _R0));
+ PPC_BCC(COND_GT, (ctx->idx + 5) * 4);
+ EMIT(PPC_RAW_LI(dst_reg, 0));
+ /*
+ * For BPF_DW case, "li reg_h,0" would be needed when
+ * !fp->aux->verifier_zext. Emit NOP otherwise.
+ *
+ * Note that "li reg_h,0" is emitted for BPF_B/H/W case,
+ * if necessary. So, jump there insted of emitting an
+ * additional "li reg_h,0" instruction.
+ */
+ if (size == BPF_DW && !fp->aux->verifier_zext)
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ else
+ EMIT(PPC_RAW_NOP());
+ /*
+ * Need to jump two instructions instead of one for BPF_DW case
+ * as there are two load instructions for dst_reg_h & dst_reg
+ * respectively.
+ */
+ if (size == BPF_DW)
+ PPC_JMP((ctx->idx + 3) * 4);
+ else
+ PPC_JMP((ctx->idx + 2) * 4);
+ }
+
+ switch (size) {
+ case BPF_B:
+ EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
+ break;
+ case BPF_H:
+ EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
+ break;
+ case BPF_W:
+ EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
+ break;
+ case BPF_DW:
+ EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off));
+ EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4));
+ break;
+ }
+
+ if (size != BPF_DW && !fp->aux->verifier_zext)
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+
+ if (BPF_MODE(code) == BPF_PROBE_MEM) {
+ int insn_idx = ctx->idx - 1;
+ int jmp_off = 4;
+
+ /*
+ * In case of BPF_DW, two lwz instructions are emitted, one
+ * for higher 32-bit and another for lower 32-bit. So, set
+ * ex->insn to the first of the two and jump over both
+ * instructions in fixup.
+ *
+ * Similarly, with !verifier_zext, two instructions are
+ * emitted for BPF_B/H/W case. So, set ex->insn to the
+ * instruction that could fault and skip over both
+ * instructions.
+ */
+ if (size == BPF_DW || !fp->aux->verifier_zext) {
+ insn_idx -= 1;
+ jmp_off += 4;
+ }
+
+ ret = bpf_add_extable_entry(fp, image, pass, ctx, insn_idx,
+ jmp_off, dst_reg);
+ if (ret)
+ return ret;
+ }
break;
/*
@@ -862,7 +931,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
case BPF_JMP | BPF_CALL:
ctx->seen |= SEEN_FUNC;
- ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass,
+ ret = bpf_jit_get_func_addr(fp, &insn[i], false,
&func_addr, &func_addr_fixed);
if (ret < 0)
return ret;
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 8571aafcc9e1..9eae8d8ed340 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -297,7 +297,7 @@ asm (
/* Assemble the body code between the prologue & epilogue */
int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
- u32 *addrs, bool extra_pass)
+ u32 *addrs, int pass)
{
enum stf_barrier_type stf_barrier = stf_barrier_type_get();
const struct bpf_insn *insn = fp->insnsi;
@@ -311,6 +311,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
u32 code = insn[i].code;
u32 dst_reg = b2p[insn[i].dst_reg];
u32 src_reg = b2p[insn[i].src_reg];
+ u32 size = BPF_SIZE(code);
s16 off = insn[i].off;
s32 imm = insn[i].imm;
bool func_addr_fixed;
@@ -778,25 +779,66 @@ emit_clear:
*/
/* dst = *(u8 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_B:
- EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
- if (insn_is_zext(&insn[i + 1]))
- addrs[++i] = ctx->idx * 4;
- break;
+ case BPF_LDX | BPF_PROBE_MEM | BPF_B:
/* dst = *(u16 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_H:
- EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
- if (insn_is_zext(&insn[i + 1]))
- addrs[++i] = ctx->idx * 4;
- break;
+ case BPF_LDX | BPF_PROBE_MEM | BPF_H:
/* dst = *(u32 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_W:
- EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
- if (insn_is_zext(&insn[i + 1]))
- addrs[++i] = ctx->idx * 4;
- break;
+ case BPF_LDX | BPF_PROBE_MEM | BPF_W:
/* dst = *(u64 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_DW:
- PPC_BPF_LL(dst_reg, src_reg, off);
+ case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
+ /*
+ * As PTR_TO_BTF_ID that uses BPF_PROBE_MEM mode could either be a valid
+ * kernel pointer or NULL but not a userspace address, execute BPF_PROBE_MEM
+ * load only if addr is kernel address (see is_kernel_addr()), otherwise
+ * set dst_reg=0 and move on.
+ */
+ if (BPF_MODE(code) == BPF_PROBE_MEM) {
+ EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], src_reg, off));
+ if (IS_ENABLED(CONFIG_PPC_BOOK3E_64))
+ PPC_LI64(b2p[TMP_REG_2], 0x8000000000000000ul);
+ else /* BOOK3S_64 */
+ PPC_LI64(b2p[TMP_REG_2], PAGE_OFFSET);
+ EMIT(PPC_RAW_CMPLD(b2p[TMP_REG_1], b2p[TMP_REG_2]));
+ PPC_BCC(COND_GT, (ctx->idx + 4) * 4);
+ EMIT(PPC_RAW_LI(dst_reg, 0));
+ /*
+ * Check if 'off' is word aligned because PPC_BPF_LL()
+ * (BPF_DW case) generates two instructions if 'off' is not
+ * word-aligned and one instruction otherwise.
+ */
+ if (BPF_SIZE(code) == BPF_DW && (off & 3))
+ PPC_JMP((ctx->idx + 3) * 4);
+ else
+ PPC_JMP((ctx->idx + 2) * 4);
+ }
+
+ switch (size) {
+ case BPF_B:
+ EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
+ break;
+ case BPF_H:
+ EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
+ break;
+ case BPF_W:
+ EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
+ break;
+ case BPF_DW:
+ PPC_BPF_LL(dst_reg, src_reg, off);
+ break;
+ }
+
+ if (size != BPF_DW && insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
+
+ if (BPF_MODE(code) == BPF_PROBE_MEM) {
+ ret = bpf_add_extable_entry(fp, image, pass, ctx, ctx->idx - 1,
+ 4, dst_reg);
+ if (ret)
+ return ret;
+ }
break;
/*
@@ -831,7 +873,7 @@ emit_clear:
case BPF_JMP | BPF_CALL:
ctx->seen |= SEEN_FUNC;
- ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass,
+ ret = bpf_jit_get_func_addr(fp, &insn[i], false,
&func_addr, &func_addr_fixed);
if (ret < 0)
return ret;
diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c
index f970d1510d3d..4738c4dbf567 100644
--- a/arch/powerpc/perf/8xx-pmu.c
+++ b/arch/powerpc/perf/8xx-pmu.c
@@ -153,7 +153,7 @@ static void mpc8xx_pmu_read(struct perf_event *event)
static void mpc8xx_pmu_del(struct perf_event *event, int flags)
{
- struct ppc_inst insn = ppc_inst(PPC_RAW_MFSPR(10, SPRN_SPRG_SCRATCH2));
+ ppc_inst_t insn = ppc_inst(PPC_RAW_MFSPR(10, SPRN_SPRG_SCRATCH2));
mpc8xx_pmu_read(event);
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 73e62e9b179b..a684901b6965 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -17,6 +17,7 @@
#include <asm/firmware.h>
#include <asm/ptrace.h>
#include <asm/code-patching.h>
+#include <asm/hw_irq.h>
#include <asm/interrupt.h>
#ifdef CONFIG_PPC64
@@ -857,6 +858,19 @@ static void write_pmc(int idx, unsigned long val)
}
}
+static int any_pmc_overflown(struct cpu_hw_events *cpuhw)
+{
+ int i, idx;
+
+ for (i = 0; i < cpuhw->n_events; i++) {
+ idx = cpuhw->event[i]->hw.idx;
+ if ((idx) && ((int)read_pmc(idx) < 0))
+ return idx;
+ }
+
+ return 0;
+}
+
/* Called from sysrq_handle_showregs() */
void perf_event_print_debug(void)
{
@@ -1281,11 +1295,13 @@ static void power_pmu_disable(struct pmu *pmu)
/*
* Set the 'freeze counters' bit, clear EBE/BHRBA/PMCC/PMAO/FC56
+ * Also clear PMXE to disable PMI's getting triggered in some
+ * corner cases during PMU disable.
*/
val = mmcr0 = mfspr(SPRN_MMCR0);
val |= MMCR0_FC;
val &= ~(MMCR0_EBE | MMCR0_BHRBA | MMCR0_PMCC | MMCR0_PMAO |
- MMCR0_FC56);
+ MMCR0_PMXE | MMCR0_FC56);
/* Set mmcr0 PMCCEXT for p10 */
if (ppmu->flags & PPMU_ARCH_31)
val |= MMCR0_PMCCEXT;
@@ -1299,6 +1315,23 @@ static void power_pmu_disable(struct pmu *pmu)
mb();
isync();
+ /*
+ * Some corner cases could clear the PMU counter overflow
+ * while a masked PMI is pending. One such case is when
+ * a PMI happens during interrupt replay and perf counter
+ * values are cleared by PMU callbacks before replay.
+ *
+ * If any PMC corresponding to the active PMU events are
+ * overflown, disable the interrupt by clearing the paca
+ * bit for PMI since we are disabling the PMU now.
+ * Otherwise provide a warning if there is PMI pending, but
+ * no counter is found overflown.
+ */
+ if (any_pmc_overflown(cpuhw))
+ clear_pmi_irq_pending();
+ else
+ WARN_ON(pmi_irq_pending());
+
val = mmcra = cpuhw->mmcr.mmcra;
/*
@@ -1390,6 +1423,15 @@ static void power_pmu_enable(struct pmu *pmu)
* (possibly updated for removal of events).
*/
if (!cpuhw->n_added) {
+ /*
+ * If there is any active event with an overflown PMC
+ * value, set back PACA_IRQ_PMI which would have been
+ * cleared in power_pmu_disable().
+ */
+ hard_irq_disable();
+ if (any_pmc_overflown(cpuhw))
+ set_pmi_irq_pending();
+
mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra & ~MMCRA_SAMPLE_ENABLE);
mtspr(SPRN_MMCR1, cpuhw->mmcr.mmcr1);
if (ppmu->flags & PPMU_ARCH_31)
@@ -2337,6 +2379,14 @@ static void __perf_event_interrupt(struct pt_regs *regs)
break;
}
}
+
+ /*
+ * Clear PACA_IRQ_PMI in case it was set by
+ * set_pmi_irq_pending() when PMU was enabled
+ * after accounting for interrupts.
+ */
+ clear_pmi_irq_pending();
+
if (!active)
/* reset non active counters that have overflowed */
write_pmc(i + 1, 0);
@@ -2356,6 +2406,13 @@ static void __perf_event_interrupt(struct pt_regs *regs)
}
}
}
+
+ /*
+ * During system wide profling or while specific CPU is monitored for an
+ * event, some corner cases could cause PMC to overflow in idle path. This
+ * will trigger a PMI after waking up from idle. Since counter values are _not_
+ * saved/restored in idle path, can lead to below "Can't find PMC" message.
+ */
if (unlikely(!found) && !arch_irq_disabled_regs(regs))
printk_ratelimited(KERN_WARNING "Can't find PMC that caused IRQ\n");
@@ -2381,6 +2438,36 @@ static void perf_event_interrupt(struct pt_regs *regs)
perf_sample_event_took(sched_clock() - start_clock);
}
+/*
+ * If the perf subsystem wants performance monitor interrupts as soon as
+ * possible (e.g., to sample the instruction address and stack chain),
+ * this should return true. The IRQ masking code can then enable MSR[EE]
+ * in some places (e.g., interrupt handlers) that allows PMI interrupts
+ * though to improve accuracy of profiles, at the cost of some performance.
+ *
+ * The PMU counters can be enabled by other means (e.g., sysfs raw SPR
+ * access), but in that case there is no need for prompt PMI handling.
+ *
+ * This currently returns true if any perf counter is being used. It
+ * could possibly return false if only events are being counted rather than
+ * samples being taken, but for now this is good enough.
+ */
+bool power_pmu_wants_prompt_pmi(void)
+{
+ struct cpu_hw_events *cpuhw;
+
+ /*
+ * This could simply test local_paca->pmcregs_in_use if that were not
+ * under ifdef KVM.
+ */
+
+ if (!ppmu)
+ return false;
+
+ cpuhw = this_cpu_ptr(&cpu_hw_events);
+ return cpuhw->n_events;
+}
+
static int power_pmu_prepare_cpu(unsigned int cpu)
{
struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
@@ -2392,7 +2479,7 @@ static int power_pmu_prepare_cpu(unsigned int cpu)
return 0;
}
-int register_power_pmu(struct power_pmu *pmu)
+int __init register_power_pmu(struct power_pmu *pmu)
{
if (ppmu)
return -EBUSY; /* something's already registered */
@@ -2419,8 +2506,24 @@ int register_power_pmu(struct power_pmu *pmu)
}
#ifdef CONFIG_PPC64
+static bool pmu_override = false;
+static unsigned long pmu_override_val;
+static void do_pmu_override(void *data)
+{
+ ppc_set_pmu_inuse(1);
+ if (pmu_override_val)
+ mtspr(SPRN_MMCR1, pmu_override_val);
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+}
+
static int __init init_ppc64_pmu(void)
{
+ if (cpu_has_feature(CPU_FTR_HVMODE) && pmu_override) {
+ pr_warn("disabling perf due to pmu_override= command line option.\n");
+ on_each_cpu(do_pmu_override, NULL, 1);
+ return 0;
+ }
+
/* run through all the pmu drivers one at a time */
if (!init_power5_pmu())
return 0;
@@ -2442,4 +2545,23 @@ static int __init init_ppc64_pmu(void)
return init_generic_compat_pmu();
}
early_initcall(init_ppc64_pmu);
+
+static int __init pmu_setup(char *str)
+{
+ unsigned long val;
+
+ if (!early_cpu_has_feature(CPU_FTR_HVMODE))
+ return 0;
+
+ pmu_override = true;
+
+ if (kstrtoul(str, 0, &val))
+ val = 0;
+
+ pmu_override_val = val;
+
+ return 1;
+}
+__setup("pmu_override=", pmu_setup);
+
#endif
diff --git a/arch/powerpc/perf/generic-compat-pmu.c b/arch/powerpc/perf/generic-compat-pmu.c
index 695975227e60..b6e25f75109d 100644
--- a/arch/powerpc/perf/generic-compat-pmu.c
+++ b/arch/powerpc/perf/generic-compat-pmu.c
@@ -307,7 +307,7 @@ static struct power_pmu generic_compat_pmu = {
.attr_groups = generic_compat_pmu_attr_groups,
};
-int init_generic_compat_pmu(void)
+int __init init_generic_compat_pmu(void)
{
int rc = 0;
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 1816f560a465..1e8aa934e37e 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -756,7 +756,7 @@ static ssize_t catalog_event_len_validate(struct hv_24x7_event_data *event,
}
if (calc_ev_end > ev_end) {
- pr_warn("event %zu exceeds it's own length: event=%pK, end=%pK, offset=%zu, calc_ev_end=%pK\n",
+ pr_warn("event %zu exceeds its own length: event=%pK, end=%pK, offset=%zu, calc_ev_end=%pK\n",
event_idx, event, ev_end, offset, calc_ev_end);
return -1;
}
diff --git a/arch/powerpc/perf/internal.h b/arch/powerpc/perf/internal.h
index 80bbf72bfec2..4c18b5504326 100644
--- a/arch/powerpc/perf/internal.h
+++ b/arch/powerpc/perf/internal.h
@@ -2,12 +2,12 @@
//
// Copyright 2019 Madhavan Srinivasan, IBM Corporation.
-extern int init_ppc970_pmu(void);
-extern int init_power5_pmu(void);
-extern int init_power5p_pmu(void);
-extern int init_power6_pmu(void);
-extern int init_power7_pmu(void);
-extern int init_power8_pmu(void);
-extern int init_power9_pmu(void);
-extern int init_power10_pmu(void);
-extern int init_generic_compat_pmu(void);
+int __init init_ppc970_pmu(void);
+int __init init_power5_pmu(void);
+int __init init_power5p_pmu(void);
+int __init init_power6_pmu(void);
+int __init init_power7_pmu(void);
+int __init init_power8_pmu(void);
+int __init init_power9_pmu(void);
+int __init init_power10_pmu(void);
+int __init init_generic_compat_pmu(void);
diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index 7ea873ab2e6f..4037ea652522 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -220,22 +220,37 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx)
/* Nothing to do */
break;
case 1:
- ret = PH(LVL, L1);
+ ret = PH(LVL, L1) | LEVEL(L1) | P(SNOOP, HIT);
break;
case 2:
- ret = PH(LVL, L2);
+ ret = PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HIT);
break;
case 3:
- ret = PH(LVL, L3);
+ ret = PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
break;
case 4:
- if (sub_idx <= 1)
- ret = PH(LVL, LOC_RAM);
- else if (sub_idx > 1 && sub_idx <= 2)
- ret = PH(LVL, REM_RAM1);
- else
- ret = PH(LVL, REM_RAM2);
- ret |= P(SNOOP, HIT);
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ ret = P(SNOOP, HIT);
+
+ if (sub_idx == 1)
+ ret |= PH(LVL, LOC_RAM) | LEVEL(RAM);
+ else if (sub_idx == 2 || sub_idx == 3)
+ ret |= P(LVL, HIT) | LEVEL(PMEM);
+ else if (sub_idx == 4)
+ ret |= PH(LVL, REM_RAM1) | REM | LEVEL(RAM) | P(HOPS, 2);
+ else if (sub_idx == 5 || sub_idx == 7)
+ ret |= P(LVL, HIT) | LEVEL(PMEM) | REM;
+ else if (sub_idx == 6)
+ ret |= PH(LVL, REM_RAM2) | REM | LEVEL(RAM) | P(HOPS, 3);
+ } else {
+ if (sub_idx <= 1)
+ ret = PH(LVL, LOC_RAM);
+ else if (sub_idx > 1 && sub_idx <= 2)
+ ret = PH(LVL, REM_RAM1);
+ else
+ ret = PH(LVL, REM_RAM2);
+ ret |= P(SNOOP, HIT);
+ }
break;
case 5:
if (cpu_has_feature(CPU_FTR_ARCH_31)) {
@@ -261,11 +276,26 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx)
}
break;
case 6:
- ret = PH(LVL, REM_CCE2);
- if ((sub_idx == 0) || (sub_idx == 2))
- ret |= P(SNOOP, HIT);
- else if ((sub_idx == 1) || (sub_idx == 3))
- ret |= P(SNOOP, HITM);
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ if (sub_idx == 0)
+ ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM |
+ P(SNOOP, HIT) | P(HOPS, 2);
+ else if (sub_idx == 1)
+ ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM |
+ P(SNOOP, HITM) | P(HOPS, 2);
+ else if (sub_idx == 2)
+ ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM |
+ P(SNOOP, HIT) | P(HOPS, 3);
+ else if (sub_idx == 3)
+ ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM |
+ P(SNOOP, HITM) | P(HOPS, 3);
+ } else {
+ ret = PH(LVL, REM_CCE2);
+ if (sub_idx == 0 || sub_idx == 2)
+ ret |= P(SNOOP, HIT);
+ else if (sub_idx == 1 || sub_idx == 3)
+ ret |= P(SNOOP, HITM);
+ }
break;
case 7:
ret = PM(LVL, L1);
diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
index 9dd75f385837..0975ad0b42c4 100644
--- a/arch/powerpc/perf/power10-pmu.c
+++ b/arch/powerpc/perf/power10-pmu.c
@@ -592,7 +592,7 @@ static struct power_pmu power10_pmu = {
.check_attr_config = power10_check_attr_config,
};
-int init_power10_pmu(void)
+int __init init_power10_pmu(void)
{
unsigned int pvr;
int rc;
diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c
index 18732267993a..753b4740ef64 100644
--- a/arch/powerpc/perf/power5+-pmu.c
+++ b/arch/powerpc/perf/power5+-pmu.c
@@ -677,7 +677,7 @@ static struct power_pmu power5p_pmu = {
.cache_events = &power5p_cache_events,
};
-int init_power5p_pmu(void)
+int __init init_power5p_pmu(void)
{
if (!cur_cpu_spec->oprofile_cpu_type ||
(strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5+")
diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c
index cb611c1e7abe..1f83c4cba0aa 100644
--- a/arch/powerpc/perf/power5-pmu.c
+++ b/arch/powerpc/perf/power5-pmu.c
@@ -618,7 +618,7 @@ static struct power_pmu power5_pmu = {
.flags = PPMU_HAS_SSLOT,
};
-int init_power5_pmu(void)
+int __init init_power5_pmu(void)
{
if (!cur_cpu_spec->oprofile_cpu_type ||
strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5"))
diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c
index 69ef38216418..aec746f86804 100644
--- a/arch/powerpc/perf/power6-pmu.c
+++ b/arch/powerpc/perf/power6-pmu.c
@@ -539,7 +539,7 @@ static struct power_pmu power6_pmu = {
.cache_events = &power6_cache_events,
};
-int init_power6_pmu(void)
+int __init init_power6_pmu(void)
{
if (!cur_cpu_spec->oprofile_cpu_type ||
strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power6"))
diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c
index 894c17f9a762..99b5ba314ea7 100644
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -445,7 +445,7 @@ static struct power_pmu power7_pmu = {
.cache_events = &power7_cache_events,
};
-int init_power7_pmu(void)
+int __init init_power7_pmu(void)
{
if (!cur_cpu_spec->oprofile_cpu_type ||
strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power7"))
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 5282e8415ddf..f21194b5604a 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -378,7 +378,7 @@ static struct power_pmu power8_pmu = {
.bhrb_nr = 32,
};
-int init_power8_pmu(void)
+int __init init_power8_pmu(void)
{
int rc;
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index ff3382140d7e..4b7c17e36100 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -452,7 +452,7 @@ static struct power_pmu power9_pmu = {
.check_attr_config = power9_check_attr_config,
};
-int init_power9_pmu(void)
+int __init init_power9_pmu(void)
{
int rc = 0;
unsigned int pvr = mfspr(SPRN_PVR);
diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c
index 1f8263785286..09802482ba72 100644
--- a/arch/powerpc/perf/ppc970-pmu.c
+++ b/arch/powerpc/perf/ppc970-pmu.c
@@ -489,7 +489,7 @@ static struct power_pmu ppc970_pmu = {
.flags = PPMU_NO_SIPR | PPMU_NO_CONT_SAMPLING,
};
-int init_ppc970_pmu(void)
+int __init init_ppc970_pmu(void)
{
if (!cur_cpu_spec->oprofile_cpu_type ||
(strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970")
diff --git a/arch/powerpc/platforms/44x/fsp2.c b/arch/powerpc/platforms/44x/fsp2.c
index 823397c802de..af13a59d2f60 100644
--- a/arch/powerpc/platforms/44x/fsp2.c
+++ b/arch/powerpc/platforms/44x/fsp2.c
@@ -197,7 +197,7 @@ static irqreturn_t rst_wrn_handler(int irq, void *data) {
}
}
-static void node_irq_request(const char *compat, irq_handler_t errirq_handler)
+static void __init node_irq_request(const char *compat, irq_handler_t errirq_handler)
{
struct device_node *np;
unsigned int irq;
@@ -222,7 +222,7 @@ static void node_irq_request(const char *compat, irq_handler_t errirq_handler)
}
}
-static void critical_irq_setup(void)
+static void __init critical_irq_setup(void)
{
node_irq_request(FSP2_CMU_ERR, cmu_err_handler);
node_irq_request(FSP2_BUS_ERR, bus_err_handler);
diff --git a/arch/powerpc/platforms/4xx/cpm.c b/arch/powerpc/platforms/4xx/cpm.c
index ae8b812c9202..2571841625a2 100644
--- a/arch/powerpc/platforms/4xx/cpm.c
+++ b/arch/powerpc/platforms/4xx/cpm.c
@@ -163,7 +163,7 @@ static ssize_t cpm_idle_store(struct kobject *kobj,
static struct kobj_attribute cpm_idle_attr =
__ATTR(idle, 0644, cpm_idle_show, cpm_idle_store);
-static void cpm_idle_config_sysfs(void)
+static void __init cpm_idle_config_sysfs(void)
{
struct device *dev;
unsigned long ret;
@@ -231,7 +231,7 @@ static const struct platform_suspend_ops cpm_suspend_ops = {
.enter = cpm_suspend_enter,
};
-static int cpm_get_uint_property(struct device_node *np,
+static int __init cpm_get_uint_property(struct device_node *np,
const char *name)
{
int len;
diff --git a/arch/powerpc/platforms/4xx/pci.c b/arch/powerpc/platforms/4xx/pci.c
index c13d64c3b019..24f41e178cbc 100644
--- a/arch/powerpc/platforms/4xx/pci.c
+++ b/arch/powerpc/platforms/4xx/pci.c
@@ -1273,7 +1273,7 @@ static int __init ppc405ex_pciex_core_init(struct device_node *np)
return 2;
}
-static void ppc405ex_pcie_phy_reset(struct ppc4xx_pciex_port *port)
+static void __init ppc405ex_pcie_phy_reset(struct ppc4xx_pciex_port *port)
{
/* Assert the PE0_PHY reset */
mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET, 0x01010000);
diff --git a/arch/powerpc/platforms/512x/clock-commonclk.c b/arch/powerpc/platforms/512x/clock-commonclk.c
index 30342b60aa63..0b03d812baae 100644
--- a/arch/powerpc/platforms/512x/clock-commonclk.c
+++ b/arch/powerpc/platforms/512x/clock-commonclk.c
@@ -97,7 +97,7 @@ static enum soc_type {
MPC512x_SOC_MPC5125,
} soc;
-static void mpc512x_clk_determine_soc(void)
+static void __init mpc512x_clk_determine_soc(void)
{
if (of_machine_is_compatible("fsl,mpc5121")) {
soc = MPC512x_SOC_MPC5121;
@@ -113,98 +113,98 @@ static void mpc512x_clk_determine_soc(void)
}
}
-static bool soc_has_mbx(void)
+static bool __init soc_has_mbx(void)
{
if (soc == MPC512x_SOC_MPC5121)
return true;
return false;
}
-static bool soc_has_axe(void)
+static bool __init soc_has_axe(void)
{
if (soc == MPC512x_SOC_MPC5125)
return false;
return true;
}
-static bool soc_has_viu(void)
+static bool __init soc_has_viu(void)
{
if (soc == MPC512x_SOC_MPC5125)
return false;
return true;
}
-static bool soc_has_spdif(void)
+static bool __init soc_has_spdif(void)
{
if (soc == MPC512x_SOC_MPC5125)
return false;
return true;
}
-static bool soc_has_pata(void)
+static bool __init soc_has_pata(void)
{
if (soc == MPC512x_SOC_MPC5125)
return false;
return true;
}
-static bool soc_has_sata(void)
+static bool __init soc_has_sata(void)
{
if (soc == MPC512x_SOC_MPC5125)
return false;
return true;
}
-static bool soc_has_pci(void)
+static bool __init soc_has_pci(void)
{
if (soc == MPC512x_SOC_MPC5125)
return false;
return true;
}
-static bool soc_has_fec2(void)
+static bool __init soc_has_fec2(void)
{
if (soc == MPC512x_SOC_MPC5125)
return true;
return false;
}
-static int soc_max_pscnum(void)
+static int __init soc_max_pscnum(void)
{
if (soc == MPC512x_SOC_MPC5125)
return 10;
return 12;
}
-static bool soc_has_sdhc2(void)
+static bool __init soc_has_sdhc2(void)
{
if (soc == MPC512x_SOC_MPC5125)
return true;
return false;
}
-static bool soc_has_nfc_5125(void)
+static bool __init soc_has_nfc_5125(void)
{
if (soc == MPC512x_SOC_MPC5125)
return true;
return false;
}
-static bool soc_has_outclk(void)
+static bool __init soc_has_outclk(void)
{
if (soc == MPC512x_SOC_MPC5125)
return true;
return false;
}
-static bool soc_has_cpmf_0_bypass(void)
+static bool __init soc_has_cpmf_0_bypass(void)
{
if (soc == MPC512x_SOC_MPC5125)
return true;
return false;
}
-static bool soc_has_mclk_mux0_canin(void)
+static bool __init soc_has_mclk_mux0_canin(void)
{
if (soc == MPC512x_SOC_MPC5125)
return true;
@@ -294,7 +294,7 @@ static inline int get_bit_field(uint32_t __iomem *reg, uint8_t pos, uint8_t len)
}
/* get the SPMF and translate it into the "sys pll" multiplier */
-static int get_spmf_mult(void)
+static int __init get_spmf_mult(void)
{
static int spmf_to_mult[] = {
68, 1, 12, 16, 20, 24, 28, 32,
@@ -312,7 +312,7 @@ static int get_spmf_mult(void)
* values returned from here are a multiple of the real factor since the
* divide ratio is fractional
*/
-static int get_sys_div_x2(void)
+static int __init get_sys_div_x2(void)
{
static int sysdiv_code_to_x2[] = {
4, 5, 6, 7, 8, 9, 10, 14,
@@ -333,7 +333,7 @@ static int get_sys_div_x2(void)
* values returned from here are a multiple of the real factor since the
* multiplier ratio is fractional
*/
-static int get_cpmf_mult_x2(void)
+static int __init get_cpmf_mult_x2(void)
{
static int cpmf_to_mult_x36[] = {
/* 0b000 is "times 36" */
@@ -379,7 +379,7 @@ static const struct clk_div_table divtab_1234[] = {
{ .div = 0, },
};
-static int get_freq_from_dt(char *propname)
+static int __init get_freq_from_dt(char *propname)
{
struct device_node *np;
const unsigned int *prop;
@@ -396,7 +396,7 @@ static int get_freq_from_dt(char *propname)
return val;
}
-static void mpc512x_clk_preset_data(void)
+static void __init mpc512x_clk_preset_data(void)
{
size_t i;
@@ -418,7 +418,7 @@ static void mpc512x_clk_preset_data(void)
* SYS -> CSB -> IPS) from the REF clock rate and the returned mul/div
* values
*/
-static void mpc512x_clk_setup_ref_clock(struct device_node *np, int bus_freq,
+static void __init mpc512x_clk_setup_ref_clock(struct device_node *np, int bus_freq,
int *sys_mul, int *sys_div,
int *ips_div)
{
@@ -592,7 +592,7 @@ static struct mclk_setup_data mclk_outclk_data[] = {
};
/* setup the MCLK clock subtree of an individual PSC/MSCAN/SPDIF */
-static void mpc512x_clk_setup_mclk(struct mclk_setup_data *entry, size_t idx)
+static void __init mpc512x_clk_setup_mclk(struct mclk_setup_data *entry, size_t idx)
{
size_t clks_idx_pub, clks_idx_int;
u32 __iomem *mccr_reg; /* MCLK control register (mux, en, div) */
@@ -701,7 +701,7 @@ static void mpc512x_clk_setup_mclk(struct mclk_setup_data *entry, size_t idx)
/* }}} MCLK helpers */
-static void mpc512x_clk_setup_clock_tree(struct device_node *np, int busfreq)
+static void __init mpc512x_clk_setup_clock_tree(struct device_node *np, int busfreq)
{
int sys_mul, sys_div, ips_div;
int mul, div;
@@ -937,7 +937,7 @@ static void mpc512x_clk_setup_clock_tree(struct device_node *np, int busfreq)
* registers the set of public clocks (those listed in the dt-bindings/
* header file) for OF lookups, keeps the intermediates private to us
*/
-static void mpc5121_clk_register_of_provider(struct device_node *np)
+static void __init mpc5121_clk_register_of_provider(struct device_node *np)
{
clk_data.clks = clks;
clk_data.clk_num = MPC512x_CLK_LAST_PUBLIC + 1; /* _not_ ARRAY_SIZE() */
@@ -948,7 +948,7 @@ static void mpc5121_clk_register_of_provider(struct device_node *np)
* temporary support for the period of time between introduction of CCF
* support and the adjustment of peripheral drivers to OF based lookups
*/
-static void mpc5121_clk_provide_migration_support(void)
+static void __init mpc5121_clk_provide_migration_support(void)
{
/*
@@ -1009,7 +1009,7 @@ static void mpc5121_clk_provide_migration_support(void)
* case of not yet adjusted device tree data, where clock related specs
* are missing)
*/
-static void mpc5121_clk_provide_backwards_compat(void)
+static void __init mpc5121_clk_provide_backwards_compat(void)
{
enum did_reg_flags {
DID_REG_PSC = BIT(0),
diff --git a/arch/powerpc/platforms/512x/mpc512x.h b/arch/powerpc/platforms/512x/mpc512x.h
index fff225901e2f..2f3c60e373e1 100644
--- a/arch/powerpc/platforms/512x/mpc512x.h
+++ b/arch/powerpc/platforms/512x/mpc512x.h
@@ -12,8 +12,8 @@ extern void __init mpc512x_init_early(void);
extern void __init mpc512x_init(void);
extern void __init mpc512x_setup_arch(void);
extern int __init mpc5121_clk_init(void);
-extern const char *mpc512x_select_psc_compat(void);
-extern const char *mpc512x_select_reset_compat(void);
+const char *__init mpc512x_select_psc_compat(void);
+const char *__init mpc512x_select_reset_compat(void);
extern void __noreturn mpc512x_restart(char *cmd);
#endif /* __MPC512X_H__ */
diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c
index 7a9ae9591d60..e3411663edad 100644
--- a/arch/powerpc/platforms/512x/mpc512x_shared.c
+++ b/arch/powerpc/platforms/512x/mpc512x_shared.c
@@ -352,7 +352,7 @@ static void __init mpc512x_declare_of_platform_devices(void)
#define DEFAULT_FIFO_SIZE 16
-const char *mpc512x_select_psc_compat(void)
+const char *__init mpc512x_select_psc_compat(void)
{
if (of_machine_is_compatible("fsl,mpc5121"))
return "fsl,mpc5121-psc";
@@ -363,7 +363,7 @@ const char *mpc512x_select_psc_compat(void)
return NULL;
}
-const char *mpc512x_select_reset_compat(void)
+const char *__init mpc512x_select_reset_compat(void)
{
if (of_machine_is_compatible("fsl,mpc5121"))
return "fsl,mpc5121-reset";
diff --git a/arch/powerpc/platforms/52xx/Kconfig b/arch/powerpc/platforms/52xx/Kconfig
index 99d60acc20c8..b72ed2950ca8 100644
--- a/arch/powerpc/platforms/52xx/Kconfig
+++ b/arch/powerpc/platforms/52xx/Kconfig
@@ -34,7 +34,7 @@ config PPC_EFIKA
bool "bPlan Efika 5k2. MPC5200B based computer"
depends on PPC_MPC52xx
select PPC_RTAS
- select PPC_NATIVE
+ select PPC_HASH_MMU_NATIVE
config PPC_LITE5200
bool "Freescale Lite5200 Eval Board"
diff --git a/arch/powerpc/platforms/83xx/km83xx.c b/arch/powerpc/platforms/83xx/km83xx.c
index 108e1e4d2683..d9eed0decb28 100644
--- a/arch/powerpc/platforms/83xx/km83xx.c
+++ b/arch/powerpc/platforms/83xx/km83xx.c
@@ -39,7 +39,7 @@
#define SVR_REV(svr) (((svr) >> 0) & 0xFFFF) /* Revision field */
-static void quirk_mpc8360e_qe_enet10(void)
+static void __init quirk_mpc8360e_qe_enet10(void)
{
/*
* handle mpc8360E Erratum QE_ENET10:
diff --git a/arch/powerpc/platforms/83xx/mpc834x_mds.c b/arch/powerpc/platforms/83xx/mpc834x_mds.c
index 6d91bdce0a18..0713deffb40c 100644
--- a/arch/powerpc/platforms/83xx/mpc834x_mds.c
+++ b/arch/powerpc/platforms/83xx/mpc834x_mds.c
@@ -35,7 +35,7 @@
#include "mpc83xx.h"
#define BCSR5_INT_USB 0x02
-static int mpc834xemds_usb_cfg(void)
+static int __init mpc834xemds_usb_cfg(void)
{
struct device_node *np;
void __iomem *bcsr_regs = NULL;
diff --git a/arch/powerpc/platforms/83xx/mpc837x_mds.c b/arch/powerpc/platforms/83xx/mpc837x_mds.c
index f28d166ea7db..fc88ab97f6e3 100644
--- a/arch/powerpc/platforms/83xx/mpc837x_mds.c
+++ b/arch/powerpc/platforms/83xx/mpc837x_mds.c
@@ -23,7 +23,7 @@
#define BCSR12_USB_SER_PIN 0x80
#define BCSR12_USB_SER_DEVICE 0x02
-static int mpc837xmds_usb_cfg(void)
+static int __init mpc837xmds_usb_cfg(void)
{
struct device_node *np;
const void *phy_type, *mode;
diff --git a/arch/powerpc/platforms/83xx/mpc837x_rdb.c b/arch/powerpc/platforms/83xx/mpc837x_rdb.c
index 7fb7684c256b..5d48c6842098 100644
--- a/arch/powerpc/platforms/83xx/mpc837x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc837x_rdb.c
@@ -18,7 +18,7 @@
#include "mpc83xx.h"
-static void mpc837x_rdb_sd_cfg(void)
+static void __init mpc837x_rdb_sd_cfg(void)
{
void __iomem *im;
diff --git a/arch/powerpc/platforms/83xx/mpc83xx.h b/arch/powerpc/platforms/83xx/mpc83xx.h
index a30d30588cf6..aea803ba3a15 100644
--- a/arch/powerpc/platforms/83xx/mpc83xx.h
+++ b/arch/powerpc/platforms/83xx/mpc83xx.h
@@ -68,9 +68,9 @@
extern void __noreturn mpc83xx_restart(char *cmd);
extern long mpc83xx_time_init(void);
-extern int mpc837x_usb_cfg(void);
-extern int mpc834x_usb_cfg(void);
-extern int mpc831x_usb_cfg(void);
+int __init mpc837x_usb_cfg(void);
+int __init mpc834x_usb_cfg(void);
+int __init mpc831x_usb_cfg(void);
extern void mpc83xx_ipic_init_IRQ(void);
#ifdef CONFIG_PCI
diff --git a/arch/powerpc/platforms/83xx/usb.c b/arch/powerpc/platforms/83xx/usb.c
index 3d247d726ed5..b0bda20aaccf 100644
--- a/arch/powerpc/platforms/83xx/usb.c
+++ b/arch/powerpc/platforms/83xx/usb.c
@@ -20,7 +20,7 @@
#ifdef CONFIG_PPC_MPC834x
-int mpc834x_usb_cfg(void)
+int __init mpc834x_usb_cfg(void)
{
unsigned long sccr, sicrl, sicrh;
void __iomem *immap;
@@ -96,7 +96,7 @@ int mpc834x_usb_cfg(void)
#endif /* CONFIG_PPC_MPC834x */
#ifdef CONFIG_PPC_MPC831x
-int mpc831x_usb_cfg(void)
+int __init mpc831x_usb_cfg(void)
{
u32 temp;
void __iomem *immap, *usb_regs;
@@ -209,7 +209,7 @@ out:
#endif /* CONFIG_PPC_MPC831x */
#ifdef CONFIG_PPC_MPC837x
-int mpc837x_usb_cfg(void)
+int __init mpc837x_usb_cfg(void)
{
void __iomem *immap;
struct device_node *np = NULL;
diff --git a/arch/powerpc/platforms/85xx/c293pcie.c b/arch/powerpc/platforms/85xx/c293pcie.c
index 8d9a2503dd0f..58a398c89e97 100644
--- a/arch/powerpc/platforms/85xx/c293pcie.c
+++ b/arch/powerpc/platforms/85xx/c293pcie.c
@@ -19,7 +19,7 @@
#include "mpc85xx.h"
-void __init c293_pcie_pic_init(void)
+static void __init c293_pcie_pic_init(void)
{
struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
MPIC_SINGLE_DEST_CPU, 0, 256, " OpenPIC ");
diff --git a/arch/powerpc/platforms/85xx/ge_imp3a.c b/arch/powerpc/platforms/85xx/ge_imp3a.c
index 83a0f7a1f0de..743c65e4d8e4 100644
--- a/arch/powerpc/platforms/85xx/ge_imp3a.c
+++ b/arch/powerpc/platforms/85xx/ge_imp3a.c
@@ -78,7 +78,7 @@ void __init ge_imp3a_pic_init(void)
of_node_put(cascade_node);
}
-static void ge_imp3a_pci_assign_primary(void)
+static void __init ge_imp3a_pci_assign_primary(void)
{
#ifdef CONFIG_PCI
struct device_node *np;
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
index 172d2b7cfeb7..5bd487030256 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
@@ -282,7 +282,7 @@ machine_device_initcall(mpc85xx_cds, mpc85xx_cds_8259_attach);
#endif /* CONFIG_PPC_I8259 */
-static void mpc85xx_cds_pci_assign_primary(void)
+static void __init mpc85xx_cds_pci_assign_primary(void)
{
#ifdef CONFIG_PCI
struct device_node *np;
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
index 4a8af80011a6..f7ac92a8ae97 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
@@ -15,6 +15,8 @@
#include <asm/io.h>
#include <asm/fsl_pm.h>
+#include "smp.h"
+
static struct ccsr_guts __iomem *guts;
#ifdef CONFIG_FSL_PMC
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index d7081e9af65c..a1c6a7827c8f 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -366,7 +366,7 @@ struct smp_ops_t smp_85xx_ops = {
#ifdef CONFIG_PPC32
atomic_t kexec_down_cpus = ATOMIC_INIT(0);
-void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
+static void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
{
local_irq_disable();
@@ -384,7 +384,7 @@ static void mpc85xx_smp_kexec_down(void *arg)
ppc_md.kexec_cpu_down(0,1);
}
#else
-void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
+static void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
{
int cpu = smp_processor_id();
int sibling = cpu_last_thread_sibling(cpu);
diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
index 199a137c0ddb..3768c86b9629 100644
--- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
+++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
@@ -271,7 +271,7 @@ static const struct irq_domain_ops socrates_fpga_pic_host_ops = {
.xlate = socrates_fpga_pic_host_xlate,
};
-void socrates_fpga_pic_init(struct device_node *pic)
+void __init socrates_fpga_pic_init(struct device_node *pic)
{
unsigned long flags;
int i;
diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.h b/arch/powerpc/platforms/85xx/socrates_fpga_pic.h
index c592b8bc94dd..c50b23794a06 100644
--- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.h
+++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.h
@@ -6,6 +6,6 @@
#ifndef SOCRATES_FPGA_PIC_H
#define SOCRATES_FPGA_PIC_H
-void socrates_fpga_pic_init(struct device_node *pic);
+void __init socrates_fpga_pic_init(struct device_node *pic);
#endif
diff --git a/arch/powerpc/platforms/85xx/xes_mpc85xx.c b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
index d54e1ae56997..397e158c1edb 100644
--- a/arch/powerpc/platforms/85xx/xes_mpc85xx.c
+++ b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
@@ -45,7 +45,7 @@ void __init xes_mpc85xx_pic_init(void)
mpic_init(mpic);
}
-static void xes_mpc85xx_configure_l2(void __iomem *l2_base)
+static void __init xes_mpc85xx_configure_l2(void __iomem *l2_base)
{
volatile uint32_t ctl, tmp;
@@ -72,7 +72,7 @@ static void xes_mpc85xx_configure_l2(void __iomem *l2_base)
asm volatile("msync; isync");
}
-static void xes_mpc85xx_fixups(void)
+static void __init xes_mpc85xx_fixups(void)
{
struct device_node *np;
int err;
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index e02d29a9d12f..d41dad227de8 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -40,9 +40,9 @@ config EPAPR_PARAVIRT
In case of doubt, say Y
-config PPC_NATIVE
+config PPC_HASH_MMU_NATIVE
bool
- depends on PPC_BOOK3S_32 || PPC64
+ depends on PPC_BOOK3S
help
Support for running natively on the hardware, i.e. without
a hypervisor. This option is not user-selectable but should
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index a208997ade88..87bc1929ee5a 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -30,8 +30,6 @@ config PPC_BOOK3S_32
bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx"
imply PPC_FPU
select PPC_HAVE_PMU_SUPPORT
- select PPC_HAVE_KUEP
- select PPC_HAVE_KUAP
select HAVE_ARCH_VMAP_STACK
config PPC_85xx
@@ -42,8 +40,7 @@ config PPC_8xx
bool "Freescale 8xx"
select ARCH_SUPPORTS_HUGETLBFS
select FSL_SOC
- select PPC_HAVE_KUEP
- select PPC_HAVE_KUAP
+ select PPC_KUEP
select HAVE_ARCH_VMAP_STACK
select HUGETLBFS
@@ -53,6 +50,7 @@ config 40x
select PPC_UDBG_16550
select 4xx_SOC
select HAVE_PCI
+ select PPC_KUEP if PPC_KUAP
config 44x
bool "AMCC 44x, 46x or 47x"
@@ -61,7 +59,7 @@ config 44x
select 4xx_SOC
select HAVE_PCI
select PHYS_64BIT
- select PPC_HAVE_KUEP
+ select PPC_KUEP
endchoice
@@ -105,9 +103,7 @@ config PPC_BOOK3S_64
select HAVE_MOVE_PMD
select HAVE_MOVE_PUD
select IRQ_WORK
- select PPC_MM_SLICES
- select PPC_HAVE_KUEP
- select PPC_HAVE_KUAP
+ select PPC_64S_HASH_MMU if !PPC_RADIX_MMU
config PPC_BOOK3E_64
bool "Embedded processors"
@@ -130,11 +126,13 @@ choice
config GENERIC_CPU
bool "Generic (POWER4 and above)"
depends on PPC64 && !CPU_LITTLE_ENDIAN
+ select PPC_64S_HASH_MMU if PPC_BOOK3S_64
config GENERIC_CPU
bool "Generic (POWER8 and above)"
depends on PPC64 && CPU_LITTLE_ENDIAN
select ARCH_HAS_FAST_MULTIPLIER
+ select PPC_64S_HASH_MMU
config GENERIC_CPU
bool "Generic 32 bits powerpc"
@@ -143,24 +141,29 @@ config GENERIC_CPU
config CELL_CPU
bool "Cell Broadband Engine"
depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
+ select PPC_64S_HASH_MMU
config POWER5_CPU
bool "POWER5"
depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
+ select PPC_64S_HASH_MMU
config POWER6_CPU
bool "POWER6"
depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
+ select PPC_64S_HASH_MMU
config POWER7_CPU
bool "POWER7"
depends on PPC_BOOK3S_64
select ARCH_HAS_FAST_MULTIPLIER
+ select PPC_64S_HASH_MMU
config POWER8_CPU
bool "POWER8"
depends on PPC_BOOK3S_64
select ARCH_HAS_FAST_MULTIPLIER
+ select PPC_64S_HASH_MMU
config POWER9_CPU
bool "POWER9"
@@ -278,6 +281,11 @@ config BOOKE
depends on E500 || 44x || PPC_BOOK3E
default y
+config BOOKE_OR_40x
+ bool
+ depends on BOOKE || 40x
+ default y
+
config FSL_BOOKE
bool
depends on E500 && PPC32
@@ -290,6 +298,7 @@ config PPC_FSL_BOOK3E
select FSL_EMB_PERFMON
select PPC_SMP_MUXED_IPI
select PPC_DOORBELL
+ select PPC_KUEP
default y if FSL_BOOKE
config PTE_64BIT
@@ -364,6 +373,22 @@ config SPE
If in doubt, say Y here.
+config PPC_64S_HASH_MMU
+ bool "Hash MMU Support"
+ depends on PPC_BOOK3S_64
+ select PPC_MM_SLICES
+ default y
+ help
+ Enable support for the Power ISA Hash style MMU. This is implemented
+ by all IBM Power and other 64-bit Book3S CPUs before ISA v3.0. The
+ OpenPOWER ISA does not mandate the hash MMU and some CPUs do not
+ implement it (e.g., Microwatt).
+
+ Note that POWER9 PowerVM platforms only support the hash
+ MMU. From POWER10 radix is also supported by PowerVM.
+
+ If you're unsure, say Y.
+
config PPC_RADIX_MMU
bool "Radix MMU Support"
depends on PPC_BOOK3S_64
@@ -375,7 +400,8 @@ config PPC_RADIX_MMU
you can probably disable this.
config PPC_RADIX_MMU_DEFAULT
- bool "Default to using the Radix MMU when possible"
+ bool "Default to using the Radix MMU when possible" if PPC_64S_HASH_MMU
+ depends on PPC_BOOK3S_64
depends on PPC_RADIX_MMU
default y
help
@@ -387,24 +413,16 @@ config PPC_RADIX_MMU_DEFAULT
If you're unsure, say Y.
-config PPC_HAVE_KUEP
- bool
-
config PPC_KUEP
- bool "Kernel Userspace Execution Prevention"
- depends on PPC_HAVE_KUEP
- default y
+ bool "Kernel Userspace Execution Prevention" if !40x
+ default y if !40x
help
Enable support for Kernel Userspace Execution Prevention (KUEP)
If you're unsure, say Y.
-config PPC_HAVE_KUAP
- bool
-
config PPC_KUAP
bool "Kernel Userspace Access Protection"
- depends on PPC_HAVE_KUAP
default y
help
Enable support for Kernel Userspace Access Protection (KUAP)
@@ -413,7 +431,7 @@ config PPC_KUAP
config PPC_KUAP_DEBUG
bool "Extra debugging for Kernel Userspace Access Protection"
- depends on PPC_KUAP && (PPC_RADIX_MMU || PPC32)
+ depends on PPC_KUAP
help
Add extra debugging for Kernel Userspace Access Protection (KUAP)
If you're unsure, say N.
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index cb70c5f25bc6..34669b060f36 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
config PPC_CELL
+ select PPC_64S_HASH_MMU if PPC64
bool
config PPC_CELL_COMMON
@@ -8,7 +9,7 @@ config PPC_CELL_COMMON
select PPC_DCR_MMIO
select PPC_INDIRECT_PIO
select PPC_INDIRECT_MMIO
- select PPC_NATIVE
+ select PPC_HASH_MMU_NATIVE
select PPC_RTAS
select IRQ_EDGE_EOI_HANDLER
diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c
index c2a0678d85db..1c4c53bec66c 100644
--- a/arch/powerpc/platforms/cell/cbe_regs.c
+++ b/arch/powerpc/platforms/cell/cbe_regs.c
@@ -165,7 +165,7 @@ u32 cbe_node_to_cpu(int node)
}
EXPORT_SYMBOL_GPL(cbe_node_to_cpu);
-static struct device_node *cbe_get_be_node(int cpu_id)
+static struct device_node *__init cbe_get_be_node(int cpu_id)
{
struct device_node *np;
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index fa08699aedeb..25e726bf0172 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -253,7 +253,7 @@ static irqreturn_t ioc_interrupt(int irq, void *data)
return IRQ_HANDLED;
}
-static int cell_iommu_find_ioc(int nid, unsigned long *base)
+static int __init cell_iommu_find_ioc(int nid, unsigned long *base)
{
struct device_node *np;
struct resource r;
@@ -293,7 +293,7 @@ static int cell_iommu_find_ioc(int nid, unsigned long *base)
return -ENODEV;
}
-static void cell_iommu_setup_stab(struct cbe_iommu *iommu,
+static void __init cell_iommu_setup_stab(struct cbe_iommu *iommu,
unsigned long dbase, unsigned long dsize,
unsigned long fbase, unsigned long fsize)
{
@@ -313,7 +313,7 @@ static void cell_iommu_setup_stab(struct cbe_iommu *iommu,
memset(iommu->stab, 0, stab_size);
}
-static unsigned long *cell_iommu_alloc_ptab(struct cbe_iommu *iommu,
+static unsigned long *__init cell_iommu_alloc_ptab(struct cbe_iommu *iommu,
unsigned long base, unsigned long size, unsigned long gap_base,
unsigned long gap_size, unsigned long page_shift)
{
@@ -373,7 +373,7 @@ static unsigned long *cell_iommu_alloc_ptab(struct cbe_iommu *iommu,
return ptab;
}
-static void cell_iommu_enable_hardware(struct cbe_iommu *iommu)
+static void __init cell_iommu_enable_hardware(struct cbe_iommu *iommu)
{
int ret;
unsigned long reg, xlate_base;
@@ -413,7 +413,7 @@ static void cell_iommu_enable_hardware(struct cbe_iommu *iommu)
out_be64(iommu->cmd_regs + IOC_IOCmd_Cfg, reg);
}
-static void cell_iommu_setup_hardware(struct cbe_iommu *iommu,
+static void __init cell_iommu_setup_hardware(struct cbe_iommu *iommu,
unsigned long base, unsigned long size)
{
cell_iommu_setup_stab(iommu, base, size, 0, 0);
@@ -858,7 +858,7 @@ static bool cell_pci_iommu_bypass_supported(struct pci_dev *pdev, u64 mask)
cell_iommu_get_fixed_address(&pdev->dev) != OF_BAD_ADDR;
}
-static void insert_16M_pte(unsigned long addr, unsigned long *ptab,
+static void __init insert_16M_pte(unsigned long addr, unsigned long *ptab,
unsigned long base_pte)
{
unsigned long segment, offset;
@@ -873,7 +873,7 @@ static void insert_16M_pte(unsigned long addr, unsigned long *ptab,
ptab[offset] = base_pte | (__pa(addr) & CBE_IOPTE_RPN_Mask);
}
-static void cell_iommu_setup_fixed_ptab(struct cbe_iommu *iommu,
+static void __init cell_iommu_setup_fixed_ptab(struct cbe_iommu *iommu,
struct device_node *np, unsigned long dbase, unsigned long dsize,
unsigned long fbase, unsigned long fsize)
{
@@ -977,6 +977,7 @@ static int __init cell_iommu_fixed_mapping_init(void)
if (hbase < dbase || (hend > (dbase + dsize))) {
pr_debug("iommu: hash window doesn't fit in"
"real DMA window\n");
+ of_node_put(np);
return -1;
}
}
diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c
index 5b9a7e9f144b..dff8d5e7ab82 100644
--- a/arch/powerpc/platforms/cell/pervasive.c
+++ b/arch/powerpc/platforms/cell/pervasive.c
@@ -78,6 +78,7 @@ static int cbe_system_reset_exception(struct pt_regs *regs)
switch (regs->msr & SRR1_WAKEMASK) {
case SRR1_WAKEDEC:
set_dec(1);
+ break;
case SRR1_WAKEEE:
/*
* Handle these when interrupts get re-enabled and we take
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index bc48234443b6..83cea9e7ee72 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -387,7 +387,7 @@ spu_irq_class_2(int irq, void *data)
return stat ? IRQ_HANDLED : IRQ_NONE;
}
-static int spu_request_irqs(struct spu *spu)
+static int __init spu_request_irqs(struct spu *spu)
{
int ret = 0;
@@ -540,7 +540,7 @@ void spu_remove_dev_attr_group(struct attribute_group *attrs)
}
EXPORT_SYMBOL_GPL(spu_remove_dev_attr_group);
-static int spu_create_dev(struct spu *spu)
+static int __init spu_create_dev(struct spu *spu)
{
int ret;
@@ -711,7 +711,7 @@ static void crash_kexec_stop_spus(void)
}
}
-static void crash_register_spus(struct list_head *list)
+static void __init crash_register_spus(struct list_head *list)
{
struct spu *spu;
int ret;
diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c
index 8e9ef65240c3..ddf8742f09a3 100644
--- a/arch/powerpc/platforms/cell/spu_manage.c
+++ b/arch/powerpc/platforms/cell/spu_manage.c
@@ -186,7 +186,7 @@ err:
return -EINVAL;
}
-static int spu_map_resource(struct spu *spu, int nr,
+static int __init spu_map_resource(struct spu *spu, int nr,
void __iomem** virt, unsigned long *phys)
{
struct device_node *np = spu->devnode;
@@ -361,7 +361,7 @@ static void disable_spu_by_master_run(struct spu_context *ctx)
static int qs20_reg_idxs[QS20_SPES_PER_BE] = { 0, 2, 4, 6, 7, 5, 3, 1 };
static int qs20_reg_memory[QS20_SPES_PER_BE] = { 1, 1, 0, 0, 0, 0, 0, 0 };
-static struct spu *spu_lookup_reg(int node, u32 reg)
+static struct spu *__init spu_lookup_reg(int node, u32 reg)
{
struct spu *spu;
const u32 *spu_reg;
@@ -374,7 +374,7 @@ static struct spu *spu_lookup_reg(int node, u32 reg)
return NULL;
}
-static void init_affinity_qs20_harcoded(void)
+static void __init init_affinity_qs20_harcoded(void)
{
int node, i;
struct spu *last_spu, *spu;
@@ -396,7 +396,7 @@ static void init_affinity_qs20_harcoded(void)
}
}
-static int of_has_vicinity(void)
+static int __init of_has_vicinity(void)
{
struct device_node *dn;
@@ -409,7 +409,7 @@ static int of_has_vicinity(void)
return 0;
}
-static struct spu *devnode_spu(int cbe, struct device_node *dn)
+static struct spu *__init devnode_spu(int cbe, struct device_node *dn)
{
struct spu *spu;
@@ -419,7 +419,7 @@ static struct spu *devnode_spu(int cbe, struct device_node *dn)
return NULL;
}
-static struct spu *
+static struct spu * __init
neighbour_spu(int cbe, struct device_node *target, struct device_node *avoid)
{
struct spu *spu;
@@ -440,7 +440,7 @@ neighbour_spu(int cbe, struct device_node *target, struct device_node *avoid)
return NULL;
}
-static void init_affinity_node(int cbe)
+static void __init init_affinity_node(int cbe)
{
struct spu *spu, *last_spu;
struct device_node *vic_dn, *last_spu_dn;
@@ -494,7 +494,7 @@ static void init_affinity_node(int cbe)
}
}
-static void init_affinity_fw(void)
+static void __init init_affinity_fw(void)
{
int cbe;
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index cb25acccd746..4c702192412f 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -648,7 +648,7 @@ static void spufs_exit_isolated_loader(void)
get_order(isolated_loader_size));
}
-static void
+static void __init
spufs_init_isolated_loader(void)
{
struct device_node *dn;
diff --git a/arch/powerpc/platforms/chrp/Kconfig b/arch/powerpc/platforms/chrp/Kconfig
index 9b5c5505718a..ff30ed579a39 100644
--- a/arch/powerpc/platforms/chrp/Kconfig
+++ b/arch/powerpc/platforms/chrp/Kconfig
@@ -11,6 +11,6 @@ config PPC_CHRP
select RTAS_ERROR_LOGGING
select PPC_MPC106
select PPC_UDBG_16550
- select PPC_NATIVE
+ select PPC_HASH_MMU_NATIVE
select FORCE_PCI
default y
diff --git a/arch/powerpc/platforms/chrp/pegasos_eth.c b/arch/powerpc/platforms/chrp/pegasos_eth.c
index 485cf5ef73d4..5c4f1a9ca154 100644
--- a/arch/powerpc/platforms/chrp/pegasos_eth.c
+++ b/arch/powerpc/platforms/chrp/pegasos_eth.c
@@ -113,7 +113,7 @@ static struct platform_device *mv643xx_eth_pd_devs[] __initdata = {
static void __iomem *mv643xx_reg_base;
-static int Enable_SRAM(void)
+static int __init Enable_SRAM(void)
{
u32 ALong;
diff --git a/arch/powerpc/platforms/embedded6xx/Kconfig b/arch/powerpc/platforms/embedded6xx/Kconfig
index 4c6d703a4284..c54786f8461e 100644
--- a/arch/powerpc/platforms/embedded6xx/Kconfig
+++ b/arch/powerpc/platforms/embedded6xx/Kconfig
@@ -55,7 +55,7 @@ config MVME5100
select FORCE_PCI
select PPC_INDIRECT_PCI
select PPC_I8259
- select PPC_NATIVE
+ select PPC_HASH_MMU_NATIVE
select PPC_UDBG_16550
help
This option enables support for the Motorola (now Emerson) MVME5100
diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
index 15396333a90b..380b4285cce4 100644
--- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
@@ -153,7 +153,7 @@ static void __hlwd_quiesce(void __iomem *io_base)
out_be32(io_base + HW_BROADWAY_ICR, 0xffffffff);
}
-static struct irq_domain *hlwd_pic_init(struct device_node *np)
+static struct irq_domain *__init hlwd_pic_init(struct device_node *np)
{
struct irq_domain *irq_domain;
struct resource res;
@@ -197,7 +197,7 @@ unsigned int hlwd_pic_get_irq(void)
*
*/
-void hlwd_pic_probe(void)
+void __init hlwd_pic_probe(void)
{
struct irq_domain *host;
struct device_node *np;
@@ -214,6 +214,7 @@ void hlwd_pic_probe(void)
irq_set_chained_handler(cascade_virq,
hlwd_pic_irq_cascade);
hlwd_irq_host = host;
+ of_node_put(np);
break;
}
}
diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.h b/arch/powerpc/platforms/embedded6xx/hlwd-pic.h
index f18eeeef0815..c2fa42e191dc 100644
--- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.h
+++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.h
@@ -11,7 +11,7 @@
#define __HLWD_PIC_H
extern unsigned int hlwd_pic_get_irq(void);
-extern void hlwd_pic_probe(void);
+void __init hlwd_pic_probe(void);
extern void hlwd_quiesce(void);
#endif
diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c
index 7a85b117f7a4..07e71ba3e846 100644
--- a/arch/powerpc/platforms/embedded6xx/holly.c
+++ b/arch/powerpc/platforms/embedded6xx/holly.c
@@ -50,7 +50,7 @@ static int holly_exclude_device(struct pci_controller *hose, u_char bus,
return PCIBIOS_SUCCESSFUL;
}
-static void holly_remap_bridge(void)
+static void __init holly_remap_bridge(void)
{
u32 lut_val, lut_addr;
int i;
diff --git a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c
index ed45db70a781..5aea46566233 100644
--- a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c
+++ b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c
@@ -194,7 +194,7 @@ static int ug_udbg_getc_poll(void)
/*
* Retrieves and prepares the virtual address needed to access the hardware.
*/
-static void __iomem *ug_udbg_setup_exi_io_base(struct device_node *np)
+static void __iomem *__init ug_udbg_setup_exi_io_base(struct device_node *np)
{
void __iomem *exi_io_base = NULL;
phys_addr_t paddr;
@@ -212,7 +212,7 @@ static void __iomem *ug_udbg_setup_exi_io_base(struct device_node *np)
/*
* Checks if a USB Gecko adapter is inserted in any memory card slot.
*/
-static void __iomem *ug_udbg_probe(void __iomem *exi_io_base)
+static void __iomem *__init ug_udbg_probe(void __iomem *exi_io_base)
{
int i;
diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c
index a802ef957d63..f60ade584bb2 100644
--- a/arch/powerpc/platforms/embedded6xx/wii.c
+++ b/arch/powerpc/platforms/embedded6xx/wii.c
@@ -69,7 +69,7 @@ static void __noreturn wii_spin(void)
cpu_relax();
}
-static void __iomem *wii_ioremap_hw_regs(char *name, char *compatible)
+static void __iomem *__init wii_ioremap_hw_regs(char *name, char *compatible)
{
void __iomem *hw_regs = NULL;
struct device_node *np;
diff --git a/arch/powerpc/platforms/maple/Kconfig b/arch/powerpc/platforms/maple/Kconfig
index 86ae210bee9a..4c058cc57c90 100644
--- a/arch/powerpc/platforms/maple/Kconfig
+++ b/arch/powerpc/platforms/maple/Kconfig
@@ -9,7 +9,8 @@ config PPC_MAPLE
select GENERIC_TBSYNC
select PPC_UDBG_16550
select PPC_970_NAP
- select PPC_NATIVE
+ select PPC_64S_HASH_MMU
+ select PPC_HASH_MMU_NATIVE
select PPC_RTAS
select MMIO_NVRAM
select ATA_NONSTANDARD if ATA
diff --git a/arch/powerpc/platforms/microwatt/Kconfig b/arch/powerpc/platforms/microwatt/Kconfig
index 8f6a81978461..5e320f49583a 100644
--- a/arch/powerpc/platforms/microwatt/Kconfig
+++ b/arch/powerpc/platforms/microwatt/Kconfig
@@ -5,7 +5,6 @@ config PPC_MICROWATT
select PPC_XICS
select PPC_ICS_NATIVE
select PPC_ICP_NATIVE
- select PPC_NATIVE
select PPC_UDBG_16550
select ARCH_RANDOM
help
diff --git a/arch/powerpc/platforms/microwatt/rng.c b/arch/powerpc/platforms/microwatt/rng.c
index 3d8ee6eb7dad..7bc4d1cbfaf0 100644
--- a/arch/powerpc/platforms/microwatt/rng.c
+++ b/arch/powerpc/platforms/microwatt/rng.c
@@ -14,7 +14,7 @@
#define DARN_ERR 0xFFFFFFFFFFFFFFFFul
-int microwatt_get_random_darn(unsigned long *v)
+static int microwatt_get_random_darn(unsigned long *v)
{
unsigned long val;
diff --git a/arch/powerpc/platforms/pasemi/Kconfig b/arch/powerpc/platforms/pasemi/Kconfig
index c52731a7773f..85ae18ddd911 100644
--- a/arch/powerpc/platforms/pasemi/Kconfig
+++ b/arch/powerpc/platforms/pasemi/Kconfig
@@ -5,7 +5,8 @@ config PPC_PASEMI
select MPIC
select FORCE_PCI
select PPC_UDBG_16550
- select PPC_NATIVE
+ select PPC_64S_HASH_MMU
+ select PPC_HASH_MMU_NATIVE
select MPIC_BROKEN_REGREAD
help
This option enables support for PA Semi's PWRficient line
diff --git a/arch/powerpc/platforms/pasemi/msi.c b/arch/powerpc/platforms/pasemi/msi.c
index e8af72282682..ea1e41451408 100644
--- a/arch/powerpc/platforms/pasemi/msi.c
+++ b/arch/powerpc/platforms/pasemi/msi.c
@@ -130,7 +130,7 @@ static int pasemi_msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
return 0;
}
-int mpic_pasemi_msi_init(struct mpic *mpic)
+int __init mpic_pasemi_msi_init(struct mpic *mpic)
{
int rc;
struct pci_controller *phb;
diff --git a/arch/powerpc/platforms/pasemi/pasemi.h b/arch/powerpc/platforms/pasemi/pasemi.h
index 70b56048ed1b..3f277a200fd8 100644
--- a/arch/powerpc/platforms/pasemi/pasemi.h
+++ b/arch/powerpc/platforms/pasemi/pasemi.h
@@ -7,7 +7,7 @@ extern void pas_pci_init(void);
extern void pas_pci_irq_fixup(struct pci_dev *dev);
extern void pas_pci_dma_dev_setup(struct pci_dev *dev);
-extern void __iomem *pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset);
+void __iomem *__init pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset);
extern void __init pasemi_map_registers(void);
diff --git a/arch/powerpc/platforms/pasemi/pci.c b/arch/powerpc/platforms/pasemi/pci.c
index 8779b107d872..d4b922759d6e 100644
--- a/arch/powerpc/platforms/pasemi/pci.c
+++ b/arch/powerpc/platforms/pasemi/pci.c
@@ -287,7 +287,7 @@ void __init pas_pci_init(void)
}
}
-void __iomem *pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset)
+void __iomem *__init pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset)
{
struct pci_controller *hose;
diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c
index 376797eb7894..f974bfe7fde1 100644
--- a/arch/powerpc/platforms/pasemi/setup.c
+++ b/arch/powerpc/platforms/pasemi/setup.c
@@ -212,7 +212,7 @@ static void sb600_8259_cascade(struct irq_desc *desc)
chip->irq_eoi(&desc->irq_data);
}
-static void nemo_init_IRQ(struct mpic *mpic)
+static void __init nemo_init_IRQ(struct mpic *mpic)
{
struct device_node *np;
int gpio_virq;
diff --git a/arch/powerpc/platforms/powermac/Kconfig b/arch/powerpc/platforms/powermac/Kconfig
index b97bf12801eb..130707ec9f99 100644
--- a/arch/powerpc/platforms/powermac/Kconfig
+++ b/arch/powerpc/platforms/powermac/Kconfig
@@ -6,7 +6,8 @@ config PPC_PMAC
select FORCE_PCI
select PPC_INDIRECT_PCI if PPC32
select PPC_MPC106 if PPC32
- select PPC_NATIVE
+ select PPC_64S_HASH_MMU if PPC64
+ select PPC_HASH_MMU_NATIVE
select ZONE_DMA if PPC32
default y
diff --git a/arch/powerpc/platforms/powermac/cache.S b/arch/powerpc/platforms/powermac/cache.S
index ced225415486..b8ae56e9f414 100644
--- a/arch/powerpc/platforms/powermac/cache.S
+++ b/arch/powerpc/platforms/powermac/cache.S
@@ -48,7 +48,7 @@ flush_disable_75x:
/* Stop DST streams */
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
sync
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
@@ -197,7 +197,7 @@ flush_disable_745x:
isync
/* Stop prefetch streams */
- DSSALL
+ PPC_DSSALL
sync
/* Disable L2 prefetching */
diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
index 5c77b9a24c0e..e67c624f35a2 100644
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -1530,7 +1530,7 @@ static long g5_reset_cpu(struct device_node *node, long param, long value)
* This takes the second CPU off the bus on dual CPU machines
* running UP
*/
-void g5_phy_disable_cpu1(void)
+void __init g5_phy_disable_cpu1(void)
{
if (uninorth_maj == 3)
UN_OUT(U3_API_PHY_CONFIG_1, 0);
diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
index f77a59b5c2e1..df89d916236d 100644
--- a/arch/powerpc/platforms/powermac/low_i2c.c
+++ b/arch/powerpc/platforms/powermac/low_i2c.c
@@ -582,6 +582,7 @@ static void __init kw_i2c_add(struct pmac_i2c_host_kw *host,
bus->close = kw_i2c_close;
bus->xfer = kw_i2c_xfer;
mutex_init(&bus->mutex);
+ lockdep_register_key(&bus->lock_key);
lockdep_set_class(&bus->mutex, &bus->lock_key);
if (controller == busnode)
bus->flags = pmac_i2c_multibus;
@@ -810,6 +811,7 @@ static void __init pmu_i2c_probe(void)
bus->hostdata = bus + 1;
bus->xfer = pmu_i2c_xfer;
mutex_init(&bus->mutex);
+ lockdep_register_key(&bus->lock_key);
lockdep_set_class(&bus->mutex, &bus->lock_key);
bus->flags = pmac_i2c_multibus;
list_add(&bus->link, &pmac_i2c_busses);
@@ -933,6 +935,7 @@ static void __init smu_i2c_probe(void)
bus->hostdata = bus + 1;
bus->xfer = smu_i2c_xfer;
mutex_init(&bus->mutex);
+ lockdep_register_key(&bus->lock_key);
lockdep_set_class(&bus->mutex, &bus->lock_key);
bus->flags = 0;
list_add(&bus->link, &pmac_i2c_busses);
diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c
index 853ccc4480e2..de8fcb607290 100644
--- a/arch/powerpc/platforms/powermac/nvram.c
+++ b/arch/powerpc/platforms/powermac/nvram.c
@@ -258,7 +258,7 @@ static u32 core99_calc_adler(u8 *buffer)
return (high << 16) | low;
}
-static u32 core99_check(u8* datas)
+static u32 __init core99_check(u8 *datas)
{
struct core99_header* hdr99 = (struct core99_header*)datas;
diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c
index f5422506d4b0..9c2947a3edd5 100644
--- a/arch/powerpc/platforms/powermac/pfunc_base.c
+++ b/arch/powerpc/platforms/powermac/pfunc_base.c
@@ -93,7 +93,7 @@ static struct pmf_handlers macio_gpio_handlers = {
.delay = macio_do_delay,
};
-static void macio_gpio_init_one(struct macio_chip *macio)
+static void __init macio_gpio_init_one(struct macio_chip *macio)
{
struct device_node *gparent, *gp;
@@ -265,7 +265,7 @@ static struct pmf_handlers macio_mmio_handlers = {
.delay = macio_do_delay,
};
-static void macio_mmio_init_one(struct macio_chip *macio)
+static void __init macio_mmio_init_one(struct macio_chip *macio)
{
DBG("Installing MMIO functions for macio %pOF\n",
macio->of_node);
@@ -294,7 +294,7 @@ static struct pmf_handlers unin_mmio_handlers = {
.delay = macio_do_delay,
};
-static void uninorth_install_pfunc(void)
+static void __init uninorth_install_pfunc(void)
{
struct device_node *np;
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index 4921bccf0376..bb0566633af5 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -18,6 +18,7 @@
#include <linux/interrupt.h>
#include <linux/syscore_ops.h>
#include <linux/adb.h>
+#include <linux/minmax.h>
#include <linux/pmu.h>
#include <asm/sections.h>
@@ -311,11 +312,8 @@ static void __init pmac_pic_probe_oldstyle(void)
/* Check ordering of master & slave */
if (of_device_is_compatible(master, "gatwick")) {
- struct device_node *tmp;
BUG_ON(slave == NULL);
- tmp = master;
- master = slave;
- slave = tmp;
+ swap(master, slave);
}
/* We found a slave */
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 13e8a8a9841c..974d4b49867b 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -166,7 +166,7 @@ static void pmac_show_cpuinfo(struct seq_file *m)
}
#ifndef CONFIG_ADB_CUDA
-int find_via_cuda(void)
+int __init find_via_cuda(void)
{
struct device_node *dn = of_find_node_by_name(NULL, "via-cuda");
@@ -180,7 +180,7 @@ int find_via_cuda(void)
#endif
#ifndef CONFIG_ADB_PMU
-int find_via_pmu(void)
+int __init find_via_pmu(void)
{
struct device_node *dn = of_find_node_by_name(NULL, "via-pmu");
@@ -194,7 +194,7 @@ int find_via_pmu(void)
#endif
#ifndef CONFIG_PMAC_SMU
-int smu_init(void)
+int __init smu_init(void)
{
/* should check and warn if SMU is present */
return 0;
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index 3256a316e884..da1efdc30d6c 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -186,7 +186,7 @@ static const struct irq_domain_ops psurge_host_ops = {
.map = psurge_host_map,
};
-static int psurge_secondary_ipi_init(void)
+static int __init psurge_secondary_ipi_init(void)
{
int rc = -ENOMEM;
@@ -875,7 +875,7 @@ static int smp_core99_cpu_online(unsigned int cpu)
static void __init smp_core99_bringup_done(void)
{
- extern void g5_phy_disable_cpu1(void);
+ extern void __init g5_phy_disable_cpu1(void);
/* Close i2c bus if it was used for tb sync */
if (pmac_tb_clock_chip_host)
diff --git a/arch/powerpc/platforms/powermac/udbg_scc.c b/arch/powerpc/platforms/powermac/udbg_scc.c
index f286bdfe8346..965827ac2e9c 100644
--- a/arch/powerpc/platforms/powermac/udbg_scc.c
+++ b/arch/powerpc/platforms/powermac/udbg_scc.c
@@ -62,7 +62,7 @@ static unsigned char scc_inittab[] = {
3, 0xc1, /* rx enable, 8 bits */
};
-void udbg_scc_init(int force_scc)
+void __init udbg_scc_init(int force_scc)
{
const u32 *reg;
unsigned long addr;
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index 043eefbbdd28..161dfe024085 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -2,7 +2,7 @@
config PPC_POWERNV
depends on PPC64 && PPC_BOOK3S
bool "IBM PowerNV (Non-Virtualized) platform support"
- select PPC_NATIVE
+ select PPC_HASH_MMU_NATIVE if PPC_64S_HASH_MMU
select PPC_XICS
select PPC_ICP_NATIVE
select PPC_XIVE_NATIVE
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index e3ffdc8e8567..9942289f379b 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -62,7 +62,7 @@ static bool deepest_stop_found;
static unsigned long power7_offline_type;
-static int pnv_save_sprs_for_deep_states(void)
+static int __init pnv_save_sprs_for_deep_states(void)
{
int cpu;
int rc;
@@ -146,9 +146,13 @@ EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
static void pnv_fastsleep_workaround_apply(void *info)
{
+ int cpu = smp_processor_id();
int rc;
int *err = info;
+ if (cpu_first_thread_sibling(cpu) != cpu)
+ return;
+
rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
OPAL_CONFIG_IDLE_APPLY);
if (rc)
@@ -175,7 +179,6 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
struct device_attribute *attr, const char *buf,
size_t count)
{
- cpumask_t primary_thread_mask;
int err;
u8 val;
@@ -200,10 +203,7 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
power7_fastsleep_workaround_exit = false;
cpus_read_lock();
- primary_thread_mask = cpu_online_cores_map();
- on_each_cpu_mask(&primary_thread_mask,
- pnv_fastsleep_workaround_apply,
- &err, 1);
+ on_each_cpu(pnv_fastsleep_workaround_apply, &err, 1);
cpus_read_unlock();
if (err) {
pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply");
@@ -306,8 +306,8 @@ struct p7_sprs {
/* per thread SPRs that get lost in shallow states */
u64 amr;
u64 iamr;
- u64 amor;
u64 uamor;
+ /* amor is restored to constant ~0 */
};
static unsigned long power7_idle_insn(unsigned long type)
@@ -378,7 +378,6 @@ static unsigned long power7_idle_insn(unsigned long type)
if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
sprs.amr = mfspr(SPRN_AMR);
sprs.iamr = mfspr(SPRN_IAMR);
- sprs.amor = mfspr(SPRN_AMOR);
sprs.uamor = mfspr(SPRN_UAMOR);
}
@@ -397,7 +396,7 @@ static unsigned long power7_idle_insn(unsigned long type)
*/
mtspr(SPRN_AMR, sprs.amr);
mtspr(SPRN_IAMR, sprs.iamr);
- mtspr(SPRN_AMOR, sprs.amor);
+ mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_UAMOR, sprs.uamor);
}
}
@@ -492,12 +491,14 @@ subcore_woken:
mtspr(SPRN_SPRG3, local_paca->sprg_vdso);
+#ifdef CONFIG_PPC_64S_HASH_MMU
/*
* The SLB has to be restored here, but it sometimes still
* contains entries, so the __ variant must be used to prevent
* multi hits.
*/
__slb_restore_bolted_realmode();
+#endif
return srr1;
}
@@ -589,7 +590,6 @@ struct p9_sprs {
u64 purr;
u64 spurr;
u64 dscr;
- u64 wort;
u64 ciabr;
u64 mmcra;
@@ -687,7 +687,6 @@ static unsigned long power9_idle_stop(unsigned long psscr)
sprs.amr = mfspr(SPRN_AMR);
sprs.iamr = mfspr(SPRN_IAMR);
- sprs.amor = mfspr(SPRN_AMOR);
sprs.uamor = mfspr(SPRN_UAMOR);
srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */
@@ -708,7 +707,7 @@ static unsigned long power9_idle_stop(unsigned long psscr)
*/
mtspr(SPRN_AMR, sprs.amr);
mtspr(SPRN_IAMR, sprs.iamr);
- mtspr(SPRN_AMOR, sprs.amor);
+ mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_UAMOR, sprs.uamor);
/*
@@ -1124,7 +1123,7 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
* stop instruction
*/
-int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags)
+int __init validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags)
{
int err = 0;
@@ -1318,7 +1317,7 @@ static void __init pnv_probe_idle_states(void)
* which is the number of cpuidle states discovered through device-tree.
*/
-static int pnv_parse_cpuidle_dt(void)
+static int __init pnv_parse_cpuidle_dt(void)
{
struct device_node *np;
int nr_idle_states, i;
diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c
index 5b9736bbc2aa..0331f1973f0e 100644
--- a/arch/powerpc/platforms/powernv/opal-core.c
+++ b/arch/powerpc/platforms/powernv/opal-core.c
@@ -89,7 +89,7 @@ static inline int is_opalcore_usable(void)
return (oc_conf && oc_conf->opalcorebuf != NULL) ? 1 : 0;
}
-static Elf64_Word *append_elf64_note(Elf64_Word *buf, char *name,
+static Elf64_Word *__init append_elf64_note(Elf64_Word *buf, char *name,
u32 type, void *data,
size_t data_len)
{
@@ -108,7 +108,7 @@ static Elf64_Word *append_elf64_note(Elf64_Word *buf, char *name,
return buf;
}
-static void fill_prstatus(struct elf_prstatus *prstatus, int pir,
+static void __init fill_prstatus(struct elf_prstatus *prstatus, int pir,
struct pt_regs *regs)
{
memset(prstatus, 0, sizeof(struct elf_prstatus));
@@ -134,7 +134,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus, int pir,
}
}
-static Elf64_Word *auxv_to_elf64_notes(Elf64_Word *buf,
+static Elf64_Word *__init auxv_to_elf64_notes(Elf64_Word *buf,
u64 opal_boot_entry)
{
Elf64_Off *bufp = (Elf64_Off *)oc_conf->auxv_buf;
diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c
index 717d1d30ade5..410ed5b9de29 100644
--- a/arch/powerpc/platforms/powernv/opal-dump.c
+++ b/arch/powerpc/platforms/powernv/opal-dump.c
@@ -208,11 +208,12 @@ static struct attribute *dump_default_attrs[] = {
&ack_attribute.attr,
NULL,
};
+ATTRIBUTE_GROUPS(dump_default);
static struct kobj_type dump_ktype = {
.sysfs_ops = &dump_sysfs_ops,
.release = &dump_release,
- .default_attrs = dump_default_attrs,
+ .default_groups = dump_default_groups,
};
static int64_t dump_read_info(uint32_t *dump_id, uint32_t *dump_size, uint32_t *dump_type)
diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c
index 5821b0fa8614..554fdd7f88b8 100644
--- a/arch/powerpc/platforms/powernv/opal-elog.c
+++ b/arch/powerpc/platforms/powernv/opal-elog.c
@@ -144,11 +144,12 @@ static struct attribute *elog_default_attrs[] = {
&ack_attribute.attr,
NULL,
};
+ATTRIBUTE_GROUPS(elog_default);
static struct kobj_type elog_ktype = {
.sysfs_ops = &elog_sysfs_ops,
.release = &elog_release,
- .default_attrs = elog_default_attrs,
+ .default_groups = elog_default_groups,
};
/* Maximum size of a single log on FSP is 16KB */
diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c
index 9a360ced663b..c8ad057c7221 100644
--- a/arch/powerpc/platforms/powernv/opal-fadump.c
+++ b/arch/powerpc/platforms/powernv/opal-fadump.c
@@ -112,7 +112,7 @@ static void opal_fadump_update_config(struct fw_dump *fadump_conf,
* This function is called in the capture kernel to get configuration details
* from metadata setup by the first kernel.
*/
-static void opal_fadump_get_config(struct fw_dump *fadump_conf,
+static void __init opal_fadump_get_config(struct fw_dump *fadump_conf,
const struct opal_fadump_mem_struct *fdm)
{
unsigned long base, size, last_end, hole_size;
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index 05d3832019b9..3fea5da6d1b3 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -200,13 +200,13 @@ static void disable_nest_pmu_counters(void)
static void disable_core_pmu_counters(void)
{
- cpumask_t cores_map;
int cpu, rc;
cpus_read_lock();
/* Disable the IMC Core functions */
- cores_map = cpu_online_cores_map();
- for_each_cpu(cpu, &cores_map) {
+ for_each_online_cpu(cpu) {
+ if (cpu_first_thread_sibling(cpu) != cpu)
+ continue;
rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
get_hard_smp_processor_id(cpu));
if (rc)
diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c
index 1e5d51db40f8..5390c888db16 100644
--- a/arch/powerpc/platforms/powernv/opal-lpc.c
+++ b/arch/powerpc/platforms/powernv/opal-lpc.c
@@ -396,6 +396,7 @@ void __init opal_lpc_init(void)
if (!of_get_property(np, "primary", NULL))
continue;
opal_lpc_chip_id = of_get_ibm_chip_id(np);
+ of_node_put(np);
break;
}
if (opal_lpc_chip_id < 0)
diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c
index d3b6e135c18b..22d6efe17b0d 100644
--- a/arch/powerpc/platforms/powernv/opal-msglog.c
+++ b/arch/powerpc/platforms/powernv/opal-msglog.c
@@ -105,7 +105,7 @@ static struct bin_attribute opal_msglog_attr = {
.read = opal_msglog_read
};
-struct memcons *memcons_init(struct device_node *node, const char *mc_prop_name)
+struct memcons *__init memcons_init(struct device_node *node, const char *mc_prop_name)
{
u64 mcaddr;
struct memcons *mc;
@@ -133,7 +133,7 @@ out_err:
return NULL;
}
-u32 memcons_get_size(struct memcons *mc)
+u32 __init memcons_get_size(struct memcons *mc)
{
return be32_to_cpu(mc->ibuf_size) + be32_to_cpu(mc->obuf_size);
}
diff --git a/arch/powerpc/platforms/powernv/opal-power.c b/arch/powerpc/platforms/powernv/opal-power.c
index 2a3717fc24ea..db99ffcb7b82 100644
--- a/arch/powerpc/platforms/powernv/opal-power.c
+++ b/arch/powerpc/platforms/powernv/opal-power.c
@@ -53,7 +53,7 @@ static bool detect_epow(void)
}
/* Check for existing EPOW, DPO events */
-static bool poweroff_pending(void)
+static bool __init poweroff_pending(void)
{
int rc;
__be64 opal_dpo_timeout;
diff --git a/arch/powerpc/platforms/powernv/opal-powercap.c b/arch/powerpc/platforms/powernv/opal-powercap.c
index c16d44f6f1d1..64506b46e77b 100644
--- a/arch/powerpc/platforms/powernv/opal-powercap.c
+++ b/arch/powerpc/platforms/powernv/opal-powercap.c
@@ -129,7 +129,7 @@ out_token:
return ret;
}
-static void powercap_add_attr(int handle, const char *name,
+static void __init powercap_add_attr(int handle, const char *name,
struct powercap_attr *attr)
{
attr->handle = handle;
diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c
index 44d7dacb33a2..a9bcf9217e64 100644
--- a/arch/powerpc/platforms/powernv/opal-rtc.c
+++ b/arch/powerpc/platforms/powernv/opal-rtc.c
@@ -18,7 +18,7 @@
#include <asm/firmware.h>
#include <asm/machdep.h>
-static void opal_to_tm(u32 y_m_d, u64 h_m_s_ms, struct rtc_time *tm)
+static void __init opal_to_tm(u32 y_m_d, u64 h_m_s_ms, struct rtc_time *tm)
{
tm->tm_year = ((bcd2bin(y_m_d >> 24) * 100) +
bcd2bin((y_m_d >> 16) & 0xff)) - 1900;
diff --git a/arch/powerpc/platforms/powernv/opal-sensor-groups.c b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
index f8ae1fb0c102..8fba7d25ae56 100644
--- a/arch/powerpc/platforms/powernv/opal-sensor-groups.c
+++ b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
@@ -126,7 +126,7 @@ static void add_attr(int handle, struct sg_attr *attr, int index)
attr->attr.store = ops_info[index].store;
}
-static int add_attr_group(const __be32 *ops, int len, struct sensor_group *sg,
+static int __init add_attr_group(const __be32 *ops, int len, struct sensor_group *sg,
u32 handle)
{
int i, j;
@@ -144,7 +144,7 @@ static int add_attr_group(const __be32 *ops, int len, struct sensor_group *sg,
return sysfs_create_group(sg_kobj, &sg->sg);
}
-static int get_nr_attrs(const __be32 *ops, int len)
+static int __init get_nr_attrs(const __be32 *ops, int len)
{
int i, j;
int nr_attrs = 0;
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index e9d18519e650..55a8fbfdb5b2 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -73,7 +73,7 @@ static struct task_struct *kopald_tsk;
static struct opal_msg *opal_msg;
static u32 opal_msg_size __ro_after_init;
-void opal_configure_cores(void)
+void __init opal_configure_cores(void)
{
u64 reinit_flags = 0;
@@ -779,7 +779,7 @@ out:
return !!recover_addr;
}
-static int opal_sysfs_init(void)
+static int __init opal_sysfs_init(void)
{
opal_kobj = kobject_create_and_add("opal", firmware_kobj);
if (!opal_kobj) {
@@ -937,7 +937,7 @@ static void __init opal_dump_region_init(void)
"rc = %d\n", rc);
}
-static void opal_pdev_init(const char *compatible)
+static void __init opal_pdev_init(const char *compatible)
{
struct device_node *np;
@@ -981,7 +981,7 @@ void opal_wake_poller(void)
wake_up_process(kopald_tsk);
}
-static void opal_init_heartbeat(void)
+static void __init opal_init_heartbeat(void)
{
/* Old firwmware, we assume the HVC heartbeat is sufficient */
if (of_property_read_u32(opal_node, "ibm,heartbeat-ms",
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 8913c86009d9..b722ac902269 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2265,7 +2265,7 @@ static const struct irq_domain_ops pnv_irq_domain_ops = {
.free = pnv_irq_domain_free,
};
-static int pnv_msi_allocate_domains(struct pci_controller *hose, unsigned int count)
+static int __init pnv_msi_allocate_domains(struct pci_controller *hose, unsigned int count)
{
struct pnv_phb *phb = hose->private_data;
struct irq_domain *parent = irq_get_default_host();
@@ -2298,7 +2298,7 @@ static int pnv_msi_allocate_domains(struct pci_controller *hose, unsigned int co
return 0;
}
-static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
+static void __init pnv_pci_init_ioda_msis(struct pnv_phb *phb)
{
unsigned int count;
const __be32 *prop = of_get_property(phb->hose->dn,
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
index 11df4e16a1cc..e297bf4abfcb 100644
--- a/arch/powerpc/platforms/powernv/powernv.h
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -39,7 +39,7 @@ bool cpu_core_split_required(void);
struct memcons;
ssize_t memcons_copy(struct memcons *mc, char *to, loff_t pos, size_t count);
-u32 memcons_get_size(struct memcons *mc);
-struct memcons *memcons_init(struct device_node *node, const char *mc_prop_name);
+u32 __init memcons_get_size(struct memcons *mc);
+struct memcons *__init memcons_init(struct device_node *node, const char *mc_prop_name);
#endif /* _POWERNV_H */
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
index 72c25295c1c2..b4386714494a 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -80,7 +80,7 @@ static int powernv_get_random_darn(unsigned long *v)
return 1;
}
-static int initialise_darn(void)
+static int __init initialise_darn(void)
{
unsigned long val;
int i;
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index ad56a54ac9c5..105d889abd51 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -40,7 +40,7 @@
#include "powernv.h"
-static bool fw_feature_is(const char *state, const char *name,
+static bool __init fw_feature_is(const char *state, const char *name,
struct device_node *fw_features)
{
struct device_node *np;
@@ -55,7 +55,7 @@ static bool fw_feature_is(const char *state, const char *name,
return rc;
}
-static void init_fw_feat_flags(struct device_node *np)
+static void __init init_fw_feat_flags(struct device_node *np)
{
if (fw_feature_is("enabled", "inst-spec-barrier-ori31,31,0", np))
security_ftr_set(SEC_FTR_SPEC_BAR_ORI31);
@@ -98,7 +98,7 @@ static void init_fw_feat_flags(struct device_node *np)
security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
}
-static void pnv_setup_security_mitigations(void)
+static void __init pnv_setup_security_mitigations(void)
{
struct device_node *np, *fw_features;
enum l1d_flush_type type;
@@ -123,10 +123,14 @@ static void pnv_setup_security_mitigations(void)
}
/*
- * If we are non-Power9 bare metal, we don't need to flush on kernel
- * entry or after user access: they fix a P9 specific vulnerability.
+ * The issues addressed by the entry and uaccess flush don't affect P7
+ * or P8, so on bare metal disable them explicitly in case firmware does
+ * not include the features to disable them. POWER9 and newer processors
+ * should have the appropriate firmware flags.
*/
- if (!pvr_version_is(PVR_POWER9)) {
+ if (pvr_version_is(PVR_POWER7) || pvr_version_is(PVR_POWER7p) ||
+ pvr_version_is(PVR_POWER8E) || pvr_version_is(PVR_POWER8NVL) ||
+ pvr_version_is(PVR_POWER8)) {
security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY);
security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS);
}
@@ -207,6 +211,7 @@ static void __init pnv_init(void)
#endif
add_preferred_console("hvc", 0, NULL);
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (!radix_enabled()) {
size_t size = sizeof(struct slb_entry) * mmu_slb_size;
int i;
@@ -219,6 +224,7 @@ static void __init pnv_init(void)
cpu_to_node(i));
}
}
+#endif
}
static void __init pnv_init_IRQ(void)
diff --git a/arch/powerpc/platforms/ps3/gelic_udbg.c b/arch/powerpc/platforms/ps3/gelic_udbg.c
index cba4f8f5b8d7..6b298010fd84 100644
--- a/arch/powerpc/platforms/ps3/gelic_udbg.c
+++ b/arch/powerpc/platforms/ps3/gelic_udbg.c
@@ -113,7 +113,7 @@ static int unmap_dma_mem(int bus_id, int dev_id, u64 bus_addr, size_t len)
return lv1_free_device_dma_region(bus_id, dev_id, real_bus_addr);
}
-static void gelic_debug_init(void)
+static void __init gelic_debug_init(void)
{
s64 result;
u64 v2;
diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c
index 9c44f335c0b9..5ce924611b94 100644
--- a/arch/powerpc/platforms/ps3/mm.c
+++ b/arch/powerpc/platforms/ps3/mm.c
@@ -41,7 +41,7 @@ enum {
PAGE_SHIFT_16M = 24U,
};
-static unsigned long make_page_sizes(unsigned long a, unsigned long b)
+static unsigned long __init make_page_sizes(unsigned long a, unsigned long b)
{
return (a << 56) | (b << 48);
}
@@ -215,7 +215,7 @@ notrace void ps3_mm_vas_destroy(void)
}
}
-static int ps3_mm_get_repository_highmem(struct mem_region *r)
+static int __init ps3_mm_get_repository_highmem(struct mem_region *r)
{
int result;
diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c
index e8530371aed6..cb844e0add2b 100644
--- a/arch/powerpc/platforms/ps3/os-area.c
+++ b/arch/powerpc/platforms/ps3/os-area.c
@@ -501,7 +501,7 @@ static int db_set_64(struct os_area_db *db, const struct os_area_db_id *id,
return -1;
}
-static int db_get_64(const struct os_area_db *db,
+static int __init db_get_64(const struct os_area_db *db,
const struct os_area_db_id *id, uint64_t *value)
{
struct db_iterator i;
@@ -517,7 +517,7 @@ static int db_get_64(const struct os_area_db *db,
return -1;
}
-static int db_get_rtc_diff(const struct os_area_db *db, int64_t *rtc_diff)
+static int __init db_get_rtc_diff(const struct os_area_db *db, int64_t *rtc_diff)
{
return db_get_64(db, &os_area_db_id_rtc_diff, (uint64_t*)rtc_diff);
}
diff --git a/arch/powerpc/platforms/ps3/platform.h b/arch/powerpc/platforms/ps3/platform.h
index 07bd39ef71ff..6beecdb0d51f 100644
--- a/arch/powerpc/platforms/ps3/platform.h
+++ b/arch/powerpc/platforms/ps3/platform.h
@@ -35,7 +35,7 @@ void __init ps3_register_ipi_irq(unsigned int cpu, unsigned int virq);
/* smp */
-void smp_init_ps3(void);
+void __init smp_init_ps3(void);
#ifdef CONFIG_SMP
void ps3_smp_cleanup_cpu(int cpu);
#else
@@ -134,9 +134,9 @@ struct ps3_repository_device {
int ps3_repository_find_device(struct ps3_repository_device *repo);
int ps3_repository_find_device_by_id(struct ps3_repository_device *repo,
u64 bus_id, u64 dev_id);
-int ps3_repository_find_devices(enum ps3_bus_type bus_type,
+int __init ps3_repository_find_devices(enum ps3_bus_type bus_type,
int (*callback)(const struct ps3_repository_device *repo));
-int ps3_repository_find_bus(enum ps3_bus_type bus_type, unsigned int from,
+int __init ps3_repository_find_bus(enum ps3_bus_type bus_type, unsigned int from,
unsigned int *bus_index);
int ps3_repository_find_interrupt(const struct ps3_repository_device *repo,
enum ps3_interrupt_type intr_type, unsigned int *interrupt_id);
@@ -211,8 +211,8 @@ static inline int ps3_repository_delete_highmem_info(unsigned int region_index)
int ps3_repository_read_num_be(unsigned int *num_be);
int ps3_repository_read_be_node_id(unsigned int be_index, u64 *node_id);
int ps3_repository_read_be_id(u64 node_id, u64 *be_id);
-int ps3_repository_read_tb_freq(u64 node_id, u64 *tb_freq);
-int ps3_repository_read_be_tb_freq(unsigned int be_index, u64 *tb_freq);
+int __init ps3_repository_read_tb_freq(u64 node_id, u64 *tb_freq);
+int __init ps3_repository_read_be_tb_freq(unsigned int be_index, u64 *tb_freq);
/* repository performance monitor info */
@@ -247,7 +247,7 @@ int ps3_repository_read_spu_resource_id(unsigned int res_index,
/* repository vuart info */
-int ps3_repository_read_vuart_av_port(unsigned int *port);
-int ps3_repository_read_vuart_sysmgr_port(unsigned int *port);
+int __init ps3_repository_read_vuart_av_port(unsigned int *port);
+int __init ps3_repository_read_vuart_sysmgr_port(unsigned int *port);
#endif
diff --git a/arch/powerpc/platforms/ps3/repository.c b/arch/powerpc/platforms/ps3/repository.c
index 21712964e76f..205763061a2d 100644
--- a/arch/powerpc/platforms/ps3/repository.c
+++ b/arch/powerpc/platforms/ps3/repository.c
@@ -413,7 +413,7 @@ found_dev:
return 0;
}
-int ps3_repository_find_devices(enum ps3_bus_type bus_type,
+int __init ps3_repository_find_devices(enum ps3_bus_type bus_type,
int (*callback)(const struct ps3_repository_device *repo))
{
int result = 0;
@@ -455,7 +455,7 @@ int ps3_repository_find_devices(enum ps3_bus_type bus_type,
return result;
}
-int ps3_repository_find_bus(enum ps3_bus_type bus_type, unsigned int from,
+int __init ps3_repository_find_bus(enum ps3_bus_type bus_type, unsigned int from,
unsigned int *bus_index)
{
unsigned int i;
@@ -908,7 +908,7 @@ int ps3_repository_read_boot_dat_size(unsigned int *size)
return result;
}
-int ps3_repository_read_vuart_av_port(unsigned int *port)
+int __init ps3_repository_read_vuart_av_port(unsigned int *port)
{
int result;
u64 v1 = 0;
@@ -923,7 +923,7 @@ int ps3_repository_read_vuart_av_port(unsigned int *port)
return result;
}
-int ps3_repository_read_vuart_sysmgr_port(unsigned int *port)
+int __init ps3_repository_read_vuart_sysmgr_port(unsigned int *port)
{
int result;
u64 v1 = 0;
@@ -1005,7 +1005,7 @@ int ps3_repository_read_be_id(u64 node_id, u64 *be_id)
be_id, NULL);
}
-int ps3_repository_read_tb_freq(u64 node_id, u64 *tb_freq)
+int __init ps3_repository_read_tb_freq(u64 node_id, u64 *tb_freq)
{
return read_node(PS3_LPAR_ID_PME,
make_first_field("be", 0),
@@ -1015,7 +1015,7 @@ int ps3_repository_read_tb_freq(u64 node_id, u64 *tb_freq)
tb_freq, NULL);
}
-int ps3_repository_read_be_tb_freq(unsigned int be_index, u64 *tb_freq)
+int __init ps3_repository_read_be_tb_freq(unsigned int be_index, u64 *tb_freq)
{
int result;
u64 node_id;
@@ -1178,7 +1178,7 @@ int ps3_repository_delete_highmem_info(unsigned int region_index)
#if defined(DEBUG)
-int ps3_repository_dump_resource_info(const struct ps3_repository_device *repo)
+int __init ps3_repository_dump_resource_info(const struct ps3_repository_device *repo)
{
int result = 0;
unsigned int res_index;
@@ -1231,7 +1231,7 @@ int ps3_repository_dump_resource_info(const struct ps3_repository_device *repo)
return result;
}
-static int dump_stor_dev_info(struct ps3_repository_device *repo)
+static int __init dump_stor_dev_info(struct ps3_repository_device *repo)
{
int result = 0;
unsigned int num_regions, region_index;
@@ -1279,7 +1279,7 @@ out:
return result;
}
-static int dump_device_info(struct ps3_repository_device *repo,
+static int __init dump_device_info(struct ps3_repository_device *repo,
unsigned int num_dev)
{
int result = 0;
@@ -1323,7 +1323,7 @@ static int dump_device_info(struct ps3_repository_device *repo,
return result;
}
-int ps3_repository_dump_bus_info(void)
+int __init ps3_repository_dump_bus_info(void)
{
int result = 0;
struct ps3_repository_device repo;
diff --git a/arch/powerpc/platforms/ps3/smp.c b/arch/powerpc/platforms/ps3/smp.c
index 93b1e73b3529..85295756005a 100644
--- a/arch/powerpc/platforms/ps3/smp.c
+++ b/arch/powerpc/platforms/ps3/smp.c
@@ -112,7 +112,7 @@ static struct smp_ops_t ps3_smp_ops = {
.kick_cpu = smp_generic_kick_cpu,
};
-void smp_init_ps3(void)
+void __init smp_init_ps3(void)
{
DBG(" -> %s\n", __func__);
smp_ops = &ps3_smp_ops;
diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c
index 0c252478e556..4a2520ec6d7f 100644
--- a/arch/powerpc/platforms/ps3/spu.c
+++ b/arch/powerpc/platforms/ps3/spu.c
@@ -137,7 +137,7 @@ u64 ps3_get_spe_id(void *arg)
}
EXPORT_SYMBOL_GPL(ps3_get_spe_id);
-static unsigned long get_vas_id(void)
+static unsigned long __init get_vas_id(void)
{
u64 id;
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 2e57391e0778..f7fd91d153a4 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -17,7 +17,6 @@ config PPC_PSERIES
select PPC_RTAS_DAEMON
select RTAS_ERROR_LOGGING
select PPC_UDBG_16550
- select PPC_NATIVE
select PPC_DOORBELL
select HOTPLUG_CPU
select ARCH_RANDOM
@@ -61,10 +60,6 @@ config PSERIES_ENERGY
Provides: /sys/devices/system/cpu/pseries_(de)activation_hint_list
and /sys/devices/system/cpu/cpuN/pseries_(de)activation_hint
-config SCANLOG
- tristate "Scanlog dump interface"
- depends on RTAS_PROC && PPC_PSERIES
-
config IO_EVENT_IRQ
bool "IO Event Interrupt support"
depends on PPC_PSERIES
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 41d8aee98da4..ee60b59024b4 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -8,7 +8,6 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \
firmware.o power.o dlpar.o mobility.o rng.o \
pci.o pci_dlpar.o eeh_pseries.o msi.o
obj-$(CONFIG_SMP) += smp.o
-obj-$(CONFIG_SCANLOG) += scanlog.o
obj-$(CONFIG_KEXEC_CORE) += kexec.o
obj-$(CONFIG_PSERIES_ENERGY) += pseries_energy.o
diff --git a/arch/powerpc/platforms/pseries/event_sources.c b/arch/powerpc/platforms/pseries/event_sources.c
index be661e919c76..623dfe0d8e1c 100644
--- a/arch/powerpc/platforms/pseries/event_sources.c
+++ b/arch/powerpc/platforms/pseries/event_sources.c
@@ -8,7 +8,7 @@
#include "pseries.h"
-void request_event_sources_irqs(struct device_node *np,
+void __init request_event_sources_irqs(struct device_node *np,
irq_handler_t handler,
const char *name)
{
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 5ab44600c8d3..b81fc846d99c 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -864,12 +864,13 @@ static int __init pseries_cpu_hotplug_init(void)
/* Processors can be added/removed only on LPAR */
if (firmware_has_feature(FW_FEATURE_LPAR)) {
for_each_node(node) {
- alloc_bootmem_cpumask_var(&node_recorded_ids_map[node]);
+ if (!alloc_cpumask_var_node(&node_recorded_ids_map[node],
+ GFP_KERNEL, node))
+ return -ENOMEM;
/* Record ids of CPU added at boot time */
- cpumask_or(node_recorded_ids_map[node],
- node_recorded_ids_map[node],
- cpumask_of_node(node));
+ cpumask_copy(node_recorded_ids_map[node],
+ cpumask_of_node(node));
}
of_reconfig_notifier_register(&pseries_smp_nb);
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 8f998e55735b..4d991cf840d9 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -1654,7 +1654,7 @@ static struct notifier_block iommu_reconfig_nb = {
};
/* These are called very early. */
-void iommu_init_early_pSeries(void)
+void __init iommu_init_early_pSeries(void)
{
if (of_chosen && of_get_property(of_chosen, "linux,iommu-off", NULL))
return;
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 3df6bdfea475..f8899d506ea4 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -58,6 +58,7 @@ EXPORT_SYMBOL(plpar_hcall);
EXPORT_SYMBOL(plpar_hcall9);
EXPORT_SYMBOL(plpar_hcall_norets);
+#ifdef CONFIG_PPC_64S_HASH_MMU
/*
* H_BLOCK_REMOVE supported block size for this page size in segment who's base
* page size is that page size.
@@ -66,6 +67,7 @@ EXPORT_SYMBOL(plpar_hcall_norets);
* page size.
*/
static int hblkrm_size[MMU_PAGE_COUNT][MMU_PAGE_COUNT] __ro_after_init;
+#endif
/*
* Due to the involved complexity, and that the current hypervisor is only
@@ -689,7 +691,7 @@ void vpa_init(int cpu)
return;
}
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
/*
* PAPR says this feature is SLB-Buffer but firmware never
* reports that. All SPLPAR support SLB shadow buffer.
@@ -702,7 +704,7 @@ void vpa_init(int cpu)
"cpu %d (hw %d) of area %lx failed with %ld\n",
cpu, hwcpu, addr, ret);
}
-#endif /* CONFIG_PPC_BOOK3S_64 */
+#endif /* CONFIG_PPC_64S_HASH_MMU */
/*
* Register dispatch trace log, if one has been allocated.
@@ -712,6 +714,36 @@ void vpa_init(int cpu)
#ifdef CONFIG_PPC_BOOK3S_64
+static int __init pseries_lpar_register_process_table(unsigned long base,
+ unsigned long page_size, unsigned long table_size)
+{
+ long rc;
+ unsigned long flags = 0;
+
+ if (table_size)
+ flags |= PROC_TABLE_NEW;
+ if (radix_enabled()) {
+ flags |= PROC_TABLE_RADIX;
+ if (mmu_has_feature(MMU_FTR_GTSE))
+ flags |= PROC_TABLE_GTSE;
+ } else
+ flags |= PROC_TABLE_HPT_SLB;
+ for (;;) {
+ rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base,
+ page_size, table_size);
+ if (!H_IS_LONG_BUSY(rc))
+ break;
+ mdelay(get_longbusy_msecs(rc));
+ }
+ if (rc != H_SUCCESS) {
+ pr_err("Failed to register process table (rc=%ld)\n", rc);
+ BUG();
+ }
+ return rc;
+}
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+
static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
unsigned long vpn, unsigned long pa,
unsigned long rflags, unsigned long vflags,
@@ -1680,34 +1712,6 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
return 0;
}
-static int pseries_lpar_register_process_table(unsigned long base,
- unsigned long page_size, unsigned long table_size)
-{
- long rc;
- unsigned long flags = 0;
-
- if (table_size)
- flags |= PROC_TABLE_NEW;
- if (radix_enabled()) {
- flags |= PROC_TABLE_RADIX;
- if (mmu_has_feature(MMU_FTR_GTSE))
- flags |= PROC_TABLE_GTSE;
- } else
- flags |= PROC_TABLE_HPT_SLB;
- for (;;) {
- rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base,
- page_size, table_size);
- if (!H_IS_LONG_BUSY(rc))
- break;
- mdelay(get_longbusy_msecs(rc));
- }
- if (rc != H_SUCCESS) {
- pr_err("Failed to register process table (rc=%ld)\n", rc);
- BUG();
- }
- return rc;
-}
-
void __init hpte_init_pseries(void)
{
mmu_hash_ops.hpte_invalidate = pSeries_lpar_hpte_invalidate;
@@ -1730,9 +1734,10 @@ void __init hpte_init_pseries(void)
if (cpu_has_feature(CPU_FTR_ARCH_300))
pseries_lpar_register_process_table(0, 0, 0);
}
+#endif /* CONFIG_PPC_64S_HASH_MMU */
#ifdef CONFIG_PPC_RADIX_MMU
-void radix_init_pseries(void)
+void __init radix_init_pseries(void)
{
pr_info("Using radix MMU under hypervisor\n");
@@ -1932,7 +1937,8 @@ int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data)
return rc;
}
-static unsigned long vsid_unscramble(unsigned long vsid, int ssize)
+#ifdef CONFIG_PPC_64S_HASH_MMU
+static unsigned long __init vsid_unscramble(unsigned long vsid, int ssize)
{
unsigned long protovsid;
unsigned long va_bits = VA_BITS;
@@ -1992,6 +1998,7 @@ static int __init reserve_vrma_context_id(void)
return 0;
}
machine_device_initcall(pseries, reserve_vrma_context_id);
+#endif
#ifdef CONFIG_DEBUG_FS
/* debugfs file interface for vpa data */
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index f71eac74ea92..c7940fcfc911 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -531,8 +531,9 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
seq_printf(m, "shared_processor_mode=%d\n",
lppaca_shared_proc(get_lppaca()));
-#ifdef CONFIG_PPC_BOOK3S_64
- seq_printf(m, "slb_size=%d\n", mmu_slb_size);
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ if (!radix_enabled())
+ seq_printf(m, "slb_size=%d\n", mmu_slb_size);
#endif
parse_em_data(m);
maxmem_data(m);
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 210a37a065fb..85033f392c78 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -451,11 +451,15 @@ static void prod_others(void)
static u16 clamp_slb_size(void)
{
+#ifdef CONFIG_PPC_64S_HASH_MMU
u16 prev = mmu_slb_size;
slb_set_size(SLB_MIN_SIZE);
return prev;
+#else
+ return 0;
+#endif
}
static int do_suspend(void)
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 3544778e06d0..56c9ef9052e9 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -11,7 +11,7 @@
struct device_node;
-extern void request_event_sources_irqs(struct device_node *np,
+void __init request_event_sources_irqs(struct device_node *np,
irq_handler_t handler, const char *name);
#include <linux/of.h>
@@ -113,6 +113,11 @@ int dlpar_workqueue_init(void);
extern u32 pseries_security_flavor;
void pseries_setup_security_mitigations(void);
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
void pseries_lpar_read_hblkrm_characteristics(void);
+#else
+static inline void pseries_lpar_read_hblkrm_characteristics(void) { }
+#endif
#endif /* _PSERIES_PSERIES_H */
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 56092dccfdb8..74c9b1b5bc66 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -526,6 +526,7 @@ static int mce_handle_err_realmode(int disposition, u8 error_type)
disposition = RTAS_DISP_FULLY_RECOVERED;
break;
case MC_ERROR_TYPE_SLB:
+#ifdef CONFIG_PPC_64S_HASH_MMU
/*
* Store the old slb content in paca before flushing.
* Print this when we go to virtual mode.
@@ -538,6 +539,7 @@ static int mce_handle_err_realmode(int disposition, u8 error_type)
slb_save_contents(local_paca->mce_faulty_slbs);
flush_and_reload_slb();
disposition = RTAS_DISP_FULLY_RECOVERED;
+#endif
break;
default:
break;
diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.c b/arch/powerpc/platforms/pseries/rtas-fadump.c
index f8f73b47b107..35f9cb602c30 100644
--- a/arch/powerpc/platforms/pseries/rtas-fadump.c
+++ b/arch/powerpc/platforms/pseries/rtas-fadump.c
@@ -39,7 +39,7 @@ static void rtas_fadump_update_config(struct fw_dump *fadump_conf,
* This function is called in the capture kernel to get configuration details
* setup in the first kernel and passed to the f/w.
*/
-static void rtas_fadump_get_config(struct fw_dump *fadump_conf,
+static void __init rtas_fadump_get_config(struct fw_dump *fadump_conf,
const struct rtas_fadump_mem_struct *fdm)
{
fadump_conf->boot_mem_addr[0] =
@@ -247,7 +247,7 @@ static inline int rtas_fadump_gpr_index(u64 id)
return i;
}
-static void rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val)
+static void __init rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val)
{
int i;
@@ -272,7 +272,7 @@ static void rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val
regs->dsisr = (unsigned long)reg_val;
}
-static struct rtas_fadump_reg_entry*
+static struct rtas_fadump_reg_entry* __init
rtas_fadump_read_regs(struct rtas_fadump_reg_entry *reg_entry,
struct pt_regs *regs)
{
diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c
deleted file mode 100644
index 2879c4f0ceb7..000000000000
--- a/arch/powerpc/platforms/pseries/scanlog.c
+++ /dev/null
@@ -1,195 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * c 2001 PPC 64 Team, IBM Corp
- *
- * scan-log-data driver for PPC64 Todd Inglett <tinglett@vnet.ibm.com>
- *
- * When ppc64 hardware fails the service processor dumps internal state
- * of the system. After a reboot the operating system can access a dump
- * of this data using this driver. A dump exists if the device-tree
- * /chosen/ibm,scan-log-data property exists.
- *
- * This driver exports /proc/powerpc/scan-log-dump which can be read.
- * The driver supports only sequential reads.
- *
- * The driver looks at a write to the driver for the single word "reset".
- * If given, the driver will reset the scanlog so the platform can free it.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/proc_fs.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-#include <asm/rtas.h>
-#include <asm/prom.h>
-
-#define MODULE_VERS "1.0"
-#define MODULE_NAME "scanlog"
-
-/* Status returns from ibm,scan-log-dump */
-#define SCANLOG_COMPLETE 0
-#define SCANLOG_HWERROR -1
-#define SCANLOG_CONTINUE 1
-
-
-static unsigned int ibm_scan_log_dump; /* RTAS token */
-static unsigned int *scanlog_buffer; /* The data buffer */
-
-static ssize_t scanlog_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
-{
- unsigned int *data = scanlog_buffer;
- int status;
- unsigned long len, off;
- unsigned int wait_time;
-
- if (count > RTAS_DATA_BUF_SIZE)
- count = RTAS_DATA_BUF_SIZE;
-
- if (count < 1024) {
- /* This is the min supported by this RTAS call. Rather
- * than do all the buffering we insist the user code handle
- * larger reads. As long as cp works... :)
- */
- printk(KERN_ERR "scanlog: cannot perform a small read (%ld)\n", count);
- return -EINVAL;
- }
-
- if (!access_ok(buf, count))
- return -EFAULT;
-
- for (;;) {
- wait_time = 500; /* default wait if no data */
- spin_lock(&rtas_data_buf_lock);
- memcpy(rtas_data_buf, data, RTAS_DATA_BUF_SIZE);
- status = rtas_call(ibm_scan_log_dump, 2, 1, NULL,
- (u32) __pa(rtas_data_buf), (u32) count);
- memcpy(data, rtas_data_buf, RTAS_DATA_BUF_SIZE);
- spin_unlock(&rtas_data_buf_lock);
-
- pr_debug("scanlog: status=%d, data[0]=%x, data[1]=%x, " \
- "data[2]=%x\n", status, data[0], data[1], data[2]);
- switch (status) {
- case SCANLOG_COMPLETE:
- pr_debug("scanlog: hit eof\n");
- return 0;
- case SCANLOG_HWERROR:
- pr_debug("scanlog: hardware error reading data\n");
- return -EIO;
- case SCANLOG_CONTINUE:
- /* We may or may not have data yet */
- len = data[1];
- off = data[2];
- if (len > 0) {
- if (copy_to_user(buf, ((char *)data)+off, len))
- return -EFAULT;
- return len;
- }
- /* Break to sleep default time */
- break;
- default:
- /* Assume extended busy */
- wait_time = rtas_busy_delay_time(status);
- if (!wait_time) {
- printk(KERN_ERR "scanlog: unknown error " \
- "from rtas: %d\n", status);
- return -EIO;
- }
- }
- /* Apparently no data yet. Wait and try again. */
- msleep_interruptible(wait_time);
- }
- /*NOTREACHED*/
-}
-
-static ssize_t scanlog_write(struct file * file, const char __user * buf,
- size_t count, loff_t *ppos)
-{
- char stkbuf[20];
- int status;
-
- if (count > 19) count = 19;
- if (copy_from_user (stkbuf, buf, count)) {
- return -EFAULT;
- }
- stkbuf[count] = 0;
-
- if (buf) {
- if (strncmp(stkbuf, "reset", 5) == 0) {
- pr_debug("scanlog: reset scanlog\n");
- status = rtas_call(ibm_scan_log_dump, 2, 1, NULL, 0, 0);
- pr_debug("scanlog: rtas returns %d\n", status);
- }
- }
- return count;
-}
-
-static int scanlog_open(struct inode * inode, struct file * file)
-{
- unsigned int *data = scanlog_buffer;
-
- if (data[0] != 0) {
- /* This imperfect test stops a second copy of the
- * data (or a reset while data is being copied)
- */
- return -EBUSY;
- }
-
- data[0] = 0; /* re-init so we restart the scan */
-
- return 0;
-}
-
-static int scanlog_release(struct inode * inode, struct file * file)
-{
- unsigned int *data = scanlog_buffer;
-
- data[0] = 0;
- return 0;
-}
-
-static const struct proc_ops scanlog_proc_ops = {
- .proc_read = scanlog_read,
- .proc_write = scanlog_write,
- .proc_open = scanlog_open,
- .proc_release = scanlog_release,
- .proc_lseek = noop_llseek,
-};
-
-static int __init scanlog_init(void)
-{
- struct proc_dir_entry *ent;
- int err = -ENOMEM;
-
- ibm_scan_log_dump = rtas_token("ibm,scan-log-dump");
- if (ibm_scan_log_dump == RTAS_UNKNOWN_SERVICE)
- return -ENODEV;
-
- /* Ideally we could allocate a buffer < 4G */
- scanlog_buffer = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
- if (!scanlog_buffer)
- goto err;
-
- ent = proc_create("powerpc/rtas/scan-log-dump", 0400, NULL,
- &scanlog_proc_ops);
- if (!ent)
- goto err;
- return 0;
-err:
- kfree(scanlog_buffer);
- return err;
-}
-
-static void __exit scanlog_cleanup(void)
-{
- remove_proc_entry("powerpc/rtas/scan-log-dump", NULL);
- kfree(scanlog_buffer);
-}
-
-module_init(scanlog_init);
-module_exit(scanlog_cleanup);
-MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 8a62af5b9c24..83a04d967a59 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -112,7 +112,7 @@ static void __init fwnmi_init(void)
u8 *mce_data_buf;
unsigned int i;
int nr_cpus = num_possible_cpus();
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
struct slb_entry *slb_ptr;
size_t size;
#endif
@@ -152,7 +152,7 @@ static void __init fwnmi_init(void)
(RTAS_ERROR_LOG_MAX * i);
}
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (!radix_enabled()) {
/* Allocate per cpu area to save old slb contents during MCE */
size = sizeof(struct slb_entry) * mmu_slb_size * nr_cpus;
@@ -447,7 +447,7 @@ void pseries_big_endian_exceptions(void)
panic("Could not enable big endian exceptions");
}
-void pseries_little_endian_exceptions(void)
+void __init pseries_little_endian_exceptions(void)
{
long rc;
@@ -801,7 +801,9 @@ static void __init pSeries_setup_arch(void)
fwnmi_init();
pseries_setup_security_mitigations();
+#ifdef CONFIG_PPC_64S_HASH_MMU
pseries_lpar_read_hblkrm_characteristics();
+#endif
/* By default, only probe PCI (can be overridden by rtas_pci) */
pci_add_flags(PCI_PROBE_ONLY);
@@ -905,7 +907,7 @@ void pSeries_coalesce_init(void)
* fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions,
* handle that here. (Stolen from parse_system_parameter_string)
*/
-static void pSeries_cmo_feature_init(void)
+static void __init pSeries_cmo_feature_init(void)
{
char *ptr, *key, *value, *end;
int call_status;
diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
index b043e3936d21..d243ddc58827 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -151,8 +151,15 @@ int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result)
if (rc == H_SUCCESS)
return 0;
- pr_err("HCALL(%llx) error %ld, query_type %u, result buffer 0x%llx\n",
- hcall, rc, query_type, result);
+ /* H_FUNCTION means HV does not support VAS so don't print an error */
+ if (rc != H_FUNCTION) {
+ pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n",
+ (hcall == H_QUERY_VAS_CAPABILITIES) ?
+ "H_QUERY_VAS_CAPABILITIES" :
+ "H_QUERY_NX_CAPABILITIES",
+ rc, query_type, result);
+ }
+
return -EIO;
}
EXPORT_SYMBOL_GPL(h_query_vas_capabilities);
@@ -482,7 +489,7 @@ EXPORT_SYMBOL_GPL(vas_unregister_api_pseries);
* Get the specific capabilities based on the feature type.
* Right now supports GZIP default and GZIP QoS capabilities.
*/
-static int get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
+static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
struct hv_vas_cop_feat_caps *hv_caps)
{
struct vas_cop_feat_caps *caps;
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index feafcb582e1b..c9f9be4ea26a 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -1061,7 +1061,7 @@ static struct attribute *vio_bus_attrs[] = {
};
ATTRIBUTE_GROUPS(vio_bus);
-static void vio_cmo_sysfs_init(void)
+static void __init vio_cmo_sysfs_init(void)
{
vio_bus_type.dev_groups = vio_cmo_dev_groups;
vio_bus_type.bus_groups = vio_bus_groups;
@@ -1073,7 +1073,7 @@ static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; }
static void vio_cmo_bus_remove(struct vio_dev *viodev) {}
static void vio_cmo_set_dma_ops(struct vio_dev *viodev) {}
static void vio_cmo_bus_init(void) {}
-static void vio_cmo_sysfs_init(void) { }
+static void __init vio_cmo_sysfs_init(void) { }
#endif /* CONFIG_PPC_SMLPAR */
EXPORT_SYMBOL(vio_cmo_entitlement_update);
EXPORT_SYMBOL(vio_cmo_set_dev_desired);
@@ -1479,7 +1479,7 @@ EXPORT_SYMBOL(vio_register_device_node);
* Starting from the root node provide, register the device node for
* each child beneath the root.
*/
-static void vio_bus_scan_register_devices(char *root_name)
+static void __init vio_bus_scan_register_devices(char *root_name)
{
struct device_node *node_root, *node_child;
diff --git a/arch/powerpc/sysdev/cpm2.c b/arch/powerpc/sysdev/cpm2.c
index 68538b8329f7..3f130312b6e9 100644
--- a/arch/powerpc/sysdev/cpm2.c
+++ b/arch/powerpc/sysdev/cpm2.c
@@ -135,7 +135,7 @@ void __cpm2_setbrg(uint brg, uint rate, uint clk, int div16, int src)
}
EXPORT_SYMBOL(__cpm2_setbrg);
-int cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode)
+int __init cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode)
{
int ret = 0;
int shift;
@@ -265,7 +265,7 @@ int cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode)
return ret;
}
-int cpm2_smc_clk_setup(enum cpm_clk_target target, int clock)
+int __init cpm2_smc_clk_setup(enum cpm_clk_target target, int clock)
{
int ret = 0;
int shift;
@@ -326,7 +326,7 @@ struct cpm2_ioports {
u32 res[3];
};
-void cpm2_set_pin(int port, int pin, int flags)
+void __init cpm2_set_pin(int port, int pin, int flags)
{
struct cpm2_ioports __iomem *iop =
(struct cpm2_ioports __iomem *)&cpm2_immr->im_ioport;
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index 1d33b7a5ea83..be6b99b1b352 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -226,7 +226,7 @@ static void dart_free(struct iommu_table *tbl, long index, long npages)
dart_cache_sync(orig_dp, orig_npages);
}
-static void allocate_dart(void)
+static void __init allocate_dart(void)
{
unsigned long tmp;
diff --git a/arch/powerpc/sysdev/fsl_mpic_err.c b/arch/powerpc/sysdev/fsl_mpic_err.c
index 9a98bb212922..df06bb6b838f 100644
--- a/arch/powerpc/sysdev/fsl_mpic_err.c
+++ b/arch/powerpc/sysdev/fsl_mpic_err.c
@@ -58,7 +58,7 @@ static struct irq_chip fsl_mpic_err_chip = {
.irq_unmask = fsl_mpic_unmask_err,
};
-int mpic_setup_error_int(struct mpic *mpic, int intvec)
+int __init mpic_setup_error_int(struct mpic *mpic, int intvec)
{
int i;
@@ -121,7 +121,7 @@ static irqreturn_t fsl_error_int_handler(int irq, void *data)
return IRQ_HANDLED;
}
-void mpic_err_int_init(struct mpic *mpic, irq_hw_number_t irqnum)
+void __init mpic_err_int_init(struct mpic *mpic, irq_hw_number_t irqnum)
{
unsigned int virq;
int ret;
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index b8f76f3fd994..674f047b7820 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -1106,7 +1106,7 @@ static const struct of_device_id pci_ids[] = {
struct device_node *fsl_pci_primary;
-void fsl_pci_assign_primary(void)
+void __init fsl_pci_assign_primary(void)
{
struct device_node *np;
diff --git a/arch/powerpc/sysdev/fsl_pci.h b/arch/powerpc/sysdev/fsl_pci.h
index 1d7a41205695..cdbde2e0c96e 100644
--- a/arch/powerpc/sysdev/fsl_pci.h
+++ b/arch/powerpc/sysdev/fsl_pci.h
@@ -120,7 +120,7 @@ u64 fsl_pci_immrbar_base(struct pci_controller *hose);
extern struct device_node *fsl_pci_primary;
#ifdef CONFIG_PCI
-void fsl_pci_assign_primary(void);
+void __init fsl_pci_assign_primary(void);
#else
static inline void fsl_pci_assign_primary(void) {}
#endif
diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c
index dc1a151c63d7..3b1ae98e3ce9 100644
--- a/arch/powerpc/sysdev/i8259.c
+++ b/arch/powerpc/sysdev/i8259.c
@@ -208,7 +208,7 @@ static const struct irq_domain_ops i8259_host_ops = {
.xlate = i8259_host_xlate,
};
-struct irq_domain *i8259_get_host(void)
+struct irq_domain *__init i8259_get_host(void)
{
return i8259_host;
}
diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c
index 7638a50a7c38..3f10c9fc3b68 100644
--- a/arch/powerpc/sysdev/ipic.c
+++ b/arch/powerpc/sysdev/ipic.c
@@ -767,7 +767,7 @@ struct ipic * __init ipic_init(struct device_node *node, unsigned int flags)
return ipic;
}
-void ipic_set_default_priority(void)
+void __init ipic_set_default_priority(void)
{
ipic_write(primary_ipic->regs, IPIC_SIPRR_A, IPIC_PRIORITY_DEFAULT);
ipic_write(primary_ipic->regs, IPIC_SIPRR_B, IPIC_PRIORITY_DEFAULT);
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 995fb2ada507..d5cb48b61bbd 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -1323,8 +1323,7 @@ struct mpic * __init mpic_alloc(struct device_node *node,
psrc = of_get_property(mpic->node, "protected-sources", &psize);
if (psrc) {
/* Allocate a bitmap with one bit per interrupt */
- unsigned int mapsize = BITS_TO_LONGS(intvec_top + 1);
- mpic->protected = kcalloc(mapsize, sizeof(long), GFP_KERNEL);
+ mpic->protected = bitmap_zalloc(intvec_top + 1, GFP_KERNEL);
BUG_ON(mpic->protected == NULL);
for (i = 0; i < psize/sizeof(u32); i++) {
if (psrc[i] > intvec_top)
@@ -1840,7 +1839,7 @@ unsigned int mpic_get_mcirq(void)
}
#ifdef CONFIG_SMP
-void mpic_request_ipis(void)
+void __init mpic_request_ipis(void)
{
struct mpic *mpic = mpic_primary;
int i;
diff --git a/arch/powerpc/sysdev/mpic.h b/arch/powerpc/sysdev/mpic.h
index 73a31a429d46..bb460ff57a06 100644
--- a/arch/powerpc/sysdev/mpic.h
+++ b/arch/powerpc/sysdev/mpic.h
@@ -8,8 +8,8 @@
#ifdef CONFIG_PCI_MSI
extern void mpic_msi_reserve_hwirq(struct mpic *mpic, irq_hw_number_t hwirq);
-extern int mpic_msi_init_allocator(struct mpic *mpic);
-extern int mpic_u3msi_init(struct mpic *mpic);
+int __init mpic_msi_init_allocator(struct mpic *mpic);
+int __init mpic_u3msi_init(struct mpic *mpic);
#else
static inline void mpic_msi_reserve_hwirq(struct mpic *mpic,
irq_hw_number_t hwirq)
@@ -24,7 +24,7 @@ static inline int mpic_u3msi_init(struct mpic *mpic)
#endif
#if defined(CONFIG_PCI_MSI) && defined(CONFIG_PPC_PASEMI)
-int mpic_pasemi_msi_init(struct mpic *mpic);
+int __init mpic_pasemi_msi_init(struct mpic *mpic);
#else
static inline int mpic_pasemi_msi_init(struct mpic *mpic) { return -1; }
#endif
@@ -37,8 +37,8 @@ extern void mpic_reset_core(int cpu);
#ifdef CONFIG_FSL_SOC
extern int mpic_map_error_int(struct mpic *mpic, unsigned int virq, irq_hw_number_t hw);
-extern void mpic_err_int_init(struct mpic *mpic, irq_hw_number_t irqnum);
-extern int mpic_setup_error_int(struct mpic *mpic, int intvec);
+void __init mpic_err_int_init(struct mpic *mpic, irq_hw_number_t irqnum);
+int __init mpic_setup_error_int(struct mpic *mpic, int intvec);
#else
static inline int mpic_map_error_int(struct mpic *mpic, unsigned int virq, irq_hw_number_t hw)
{
diff --git a/arch/powerpc/sysdev/mpic_msi.c b/arch/powerpc/sysdev/mpic_msi.c
index 4695c04320ae..f412d6ad0b66 100644
--- a/arch/powerpc/sysdev/mpic_msi.c
+++ b/arch/powerpc/sysdev/mpic_msi.c
@@ -24,7 +24,7 @@ void mpic_msi_reserve_hwirq(struct mpic *mpic, irq_hw_number_t hwirq)
}
#ifdef CONFIG_MPIC_U3_HT_IRQS
-static int mpic_msi_reserve_u3_hwirqs(struct mpic *mpic)
+static int __init mpic_msi_reserve_u3_hwirqs(struct mpic *mpic)
{
irq_hw_number_t hwirq;
const struct irq_domain_ops *ops = mpic->irqhost->ops;
@@ -68,13 +68,13 @@ static int mpic_msi_reserve_u3_hwirqs(struct mpic *mpic)
return 0;
}
#else
-static int mpic_msi_reserve_u3_hwirqs(struct mpic *mpic)
+static int __init mpic_msi_reserve_u3_hwirqs(struct mpic *mpic)
{
return -1;
}
#endif
-int mpic_msi_init_allocator(struct mpic *mpic)
+int __init mpic_msi_init_allocator(struct mpic *mpic)
{
int rc;
diff --git a/arch/powerpc/sysdev/mpic_timer.c b/arch/powerpc/sysdev/mpic_timer.c
index a42a20280035..444e9ce42d0a 100644
--- a/arch/powerpc/sysdev/mpic_timer.c
+++ b/arch/powerpc/sysdev/mpic_timer.c
@@ -384,7 +384,7 @@ struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev,
}
EXPORT_SYMBOL(mpic_request_timer);
-static int timer_group_get_freq(struct device_node *np,
+static int __init timer_group_get_freq(struct device_node *np,
struct timer_group_priv *priv)
{
u32 div;
@@ -411,7 +411,7 @@ static int timer_group_get_freq(struct device_node *np,
return 0;
}
-static int timer_group_get_irq(struct device_node *np,
+static int __init timer_group_get_irq(struct device_node *np,
struct timer_group_priv *priv)
{
const u32 all_timer[] = { 0, TIMERS_PER_GROUP };
@@ -459,7 +459,7 @@ static int timer_group_get_irq(struct device_node *np,
return 0;
}
-static void timer_group_init(struct device_node *np)
+static void __init timer_group_init(struct device_node *np)
{
struct timer_group_priv *priv;
unsigned int i = 0;
diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c
index fa53b6d85ef9..3f4841dfefb5 100644
--- a/arch/powerpc/sysdev/mpic_u3msi.c
+++ b/arch/powerpc/sysdev/mpic_u3msi.c
@@ -169,7 +169,7 @@ static int u3msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
return 0;
}
-int mpic_u3msi_init(struct mpic *mpic)
+int __init mpic_u3msi_init(struct mpic *mpic)
{
int rc;
struct pci_controller *phb;
diff --git a/arch/powerpc/sysdev/tsi108_dev.c b/arch/powerpc/sysdev/tsi108_dev.c
index 4c4a6efd5e5f..9e13fb35ed5c 100644
--- a/arch/powerpc/sysdev/tsi108_dev.c
+++ b/arch/powerpc/sysdev/tsi108_dev.c
@@ -51,13 +51,12 @@ phys_addr_t get_csrbase(void)
}
return tsi108_csr_base;
}
+EXPORT_SYMBOL(get_csrbase);
u32 get_vir_csrbase(void)
{
return (u32) (ioremap(get_csrbase(), 0x10000));
}
-
-EXPORT_SYMBOL(get_csrbase);
EXPORT_SYMBOL(get_vir_csrbase);
static int __init tsi108_eth_of_init(void)
diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c
index 042bb38fa5c2..1070220f15d5 100644
--- a/arch/powerpc/sysdev/tsi108_pci.c
+++ b/arch/powerpc/sysdev/tsi108_pci.c
@@ -257,7 +257,7 @@ static void tsi108_pci_int_unmask(u_int irq)
mb();
}
-static void init_pci_source(void)
+static void __init init_pci_source(void)
{
tsi108_write_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_CFG_CTL,
0x0000ff00);
diff --git a/arch/powerpc/sysdev/udbg_memcons.c b/arch/powerpc/sysdev/udbg_memcons.c
index d38bbeed219b..5020044400dc 100644
--- a/arch/powerpc/sysdev/udbg_memcons.c
+++ b/arch/powerpc/sysdev/udbg_memcons.c
@@ -92,7 +92,7 @@ int memcons_getc(void)
return c;
}
-void udbg_init_memcons(void)
+void __init udbg_init_memcons(void)
{
udbg_putc = memcons_putc;
udbg_getc = memcons_getc;
diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c
index 6765d9e264a3..cf8db19a4f7d 100644
--- a/arch/powerpc/sysdev/xics/icp-hv.c
+++ b/arch/powerpc/sysdev/xics/icp-hv.c
@@ -162,7 +162,7 @@ static const struct icp_ops icp_hv_ops = {
#endif
};
-int icp_hv_init(void)
+int __init icp_hv_init(void)
{
struct device_node *np;
diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c
index 675d708863d5..bda4c32582d9 100644
--- a/arch/powerpc/sysdev/xics/icp-opal.c
+++ b/arch/powerpc/sysdev/xics/icp-opal.c
@@ -184,7 +184,7 @@ static const struct icp_ops icp_opal_ops = {
#endif
};
-int icp_opal_init(void)
+int __init icp_opal_init(void)
{
struct device_node *np;
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index 244a727c6ba4..f3fb2a12124c 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -121,7 +121,7 @@ void xics_mask_unknown_vec(unsigned int vec)
#ifdef CONFIG_SMP
-static void xics_request_ipi(void)
+static void __init xics_request_ipi(void)
{
unsigned int ipi;
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 7b69299c2912..1ca5564bda9d 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -85,6 +85,16 @@ static DEFINE_PER_CPU(struct xive_cpu *, xive_cpu);
#define XIVE_INVALID_TARGET (-1)
/*
+ * Global toggle to switch on/off StoreEOI
+ */
+static bool xive_store_eoi = true;
+
+static bool xive_is_store_eoi(struct xive_irq_data *xd)
+{
+ return xd->flags & XIVE_IRQ_FLAG_STORE_EOI && xive_store_eoi;
+}
+
+/*
* Read the next entry in a queue, return its content if it's valid
* or 0 if there is no new entry.
*
@@ -208,7 +218,7 @@ static notrace u8 xive_esb_read(struct xive_irq_data *xd, u32 offset)
{
u64 val;
- if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
+ if (offset == XIVE_ESB_SET_PQ_10 && xive_is_store_eoi(xd))
offset |= XIVE_ESB_LD_ST_MO;
if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw)
@@ -227,6 +237,21 @@ static void xive_esb_write(struct xive_irq_data *xd, u32 offset, u64 data)
out_be64(xd->eoi_mmio + offset, data);
}
+#if defined(CONFIG_XMON) || defined(CONFIG_DEBUG_FS)
+static void xive_irq_data_dump(struct xive_irq_data *xd, char *buffer, size_t size)
+{
+ u64 val = xive_esb_read(xd, XIVE_ESB_GET);
+
+ snprintf(buffer, size, "flags=%c%c%c PQ=%c%c 0x%016llx 0x%016llx",
+ xive_is_store_eoi(xd) ? 'S' : ' ',
+ xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ',
+ xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ',
+ val & XIVE_ESB_VAL_P ? 'P' : '-',
+ val & XIVE_ESB_VAL_Q ? 'Q' : '-',
+ xd->trig_page, xd->eoi_page);
+}
+#endif
+
#ifdef CONFIG_XMON
static notrace void xive_dump_eq(const char *name, struct xive_q *q)
{
@@ -252,11 +277,10 @@ notrace void xmon_xive_do_dump(int cpu)
#ifdef CONFIG_SMP
{
- u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET);
+ char buffer[128];
- xmon_printf("IPI=0x%08x PQ=%c%c ", xc->hw_ipi,
- val & XIVE_ESB_VAL_P ? 'P' : '-',
- val & XIVE_ESB_VAL_Q ? 'Q' : '-');
+ xive_irq_data_dump(&xc->ipi_data, buffer, sizeof(buffer));
+ xmon_printf("IPI=0x%08x %s", xc->hw_ipi, buffer);
}
#endif
xive_dump_eq("EQ", &xc->queue[xive_irq_priority]);
@@ -291,15 +315,11 @@ int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d)
d = xive_get_irq_data(hw_irq);
if (d) {
- struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
- u64 val = xive_esb_read(xd, XIVE_ESB_GET);
-
- xmon_printf("flags=%c%c%c PQ=%c%c",
- xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ',
- xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ',
- xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ',
- val & XIVE_ESB_VAL_P ? 'P' : '-',
- val & XIVE_ESB_VAL_Q ? 'Q' : '-');
+ char buffer[128];
+
+ xive_irq_data_dump(irq_data_get_irq_handler_data(d),
+ buffer, sizeof(buffer));
+ xmon_printf("%s", buffer);
}
xmon_printf("\n");
@@ -385,7 +405,7 @@ static void xive_do_source_eoi(struct xive_irq_data *xd)
xd->stale_p = false;
/* If the XIVE supports the new "store EOI facility, use it */
- if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI) {
+ if (xive_is_store_eoi(xd)) {
xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0);
return;
}
@@ -451,6 +471,8 @@ static void xive_do_source_set_mask(struct xive_irq_data *xd,
{
u64 val;
+ pr_debug("%s: HW 0x%x %smask\n", __func__, xd->hw_irq, mask ? "" : "un");
+
/*
* If the interrupt had P set, it may be in a queue.
*
@@ -612,8 +634,8 @@ static unsigned int xive_irq_startup(struct irq_data *d)
xd->saved_p = false;
xd->stale_p = false;
- pr_devel("xive_irq_startup: irq %d [0x%x] data @%p\n",
- d->irq, hw_irq, d);
+
+ pr_debug("%s: irq %d [0x%x] data @%p\n", __func__, d->irq, hw_irq, d);
/* Pick a target */
target = xive_pick_irq_target(d, irq_data_get_affinity_mask(d));
@@ -654,8 +676,7 @@ static void xive_irq_shutdown(struct irq_data *d)
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
- pr_devel("xive_irq_shutdown: irq %d [0x%x] data @%p\n",
- d->irq, hw_irq, d);
+ pr_debug("%s: irq %d [0x%x] data @%p\n", __func__, d->irq, hw_irq, d);
if (WARN_ON(xd->target == XIVE_INVALID_TARGET))
return;
@@ -679,7 +700,7 @@ static void xive_irq_unmask(struct irq_data *d)
{
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
- pr_devel("xive_irq_unmask: irq %d data @%p\n", d->irq, xd);
+ pr_debug("%s: irq %d data @%p\n", __func__, d->irq, xd);
xive_do_source_set_mask(xd, false);
}
@@ -688,7 +709,7 @@ static void xive_irq_mask(struct irq_data *d)
{
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
- pr_devel("xive_irq_mask: irq %d data @%p\n", d->irq, xd);
+ pr_debug("%s: irq %d data @%p\n", __func__, d->irq, xd);
xive_do_source_set_mask(xd, true);
}
@@ -702,7 +723,7 @@ static int xive_irq_set_affinity(struct irq_data *d,
u32 target, old_target;
int rc = 0;
- pr_debug("%s: irq %d/%x\n", __func__, d->irq, hw_irq);
+ pr_debug("%s: irq %d/0x%x\n", __func__, d->irq, hw_irq);
/* Is this valid ? */
if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids)
@@ -975,7 +996,7 @@ EXPORT_SYMBOL_GPL(is_xive_irq);
void xive_cleanup_irq_data(struct xive_irq_data *xd)
{
- pr_debug("%s for HW %x\n", __func__, xd->hw_irq);
+ pr_debug("%s for HW 0x%x\n", __func__, xd->hw_irq);
if (xd->eoi_mmio) {
iounmap(xd->eoi_mmio);
@@ -1211,8 +1232,8 @@ static int xive_setup_cpu_ipi(unsigned int cpu)
pr_err("Failed to map IPI CPU %d\n", cpu);
return -EIO;
}
- pr_devel("CPU %d HW IPI %x, virq %d, trig_mmio=%p\n", cpu,
- xc->hw_ipi, xive_ipi_irq, xc->ipi_data.trig_mmio);
+ pr_debug("CPU %d HW IPI 0x%x, virq %d, trig_mmio=%p\n", cpu,
+ xc->hw_ipi, xive_ipi_irq, xc->ipi_data.trig_mmio);
/* Unmask it */
xive_do_source_set_mask(&xc->ipi_data, false);
@@ -1390,7 +1411,7 @@ static int xive_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
if (rc)
return rc;
- pr_debug("%s %d/%lx #%d\n", __func__, virq, hwirq, nr_irqs);
+ pr_debug("%s %d/0x%lx #%d\n", __func__, virq, hwirq, nr_irqs);
for (i = 0; i < nr_irqs; i++) {
/* TODO: call xive_irq_domain_map() */
@@ -1504,7 +1525,7 @@ static void xive_setup_cpu(void)
#ifdef CONFIG_SMP
void xive_smp_setup_cpu(void)
{
- pr_devel("SMP setup CPU %d\n", smp_processor_id());
+ pr_debug("SMP setup CPU %d\n", smp_processor_id());
/* This will have already been done on the boot CPU */
if (smp_processor_id() != boot_cpuid)
@@ -1650,10 +1671,10 @@ bool __init xive_core_init(struct device_node *np, const struct xive_ops *ops,
ppc_md.get_irq = xive_get_irq;
__xive_enabled = true;
- pr_devel("Initializing host..\n");
+ pr_debug("Initializing host..\n");
xive_init_host(np);
- pr_devel("Initializing boot CPU..\n");
+ pr_debug("Initializing boot CPU..\n");
/* Allocate per-CPU data and queues */
xive_prepare_cpu(smp_processor_id());
@@ -1691,36 +1712,36 @@ static int __init xive_off(char *arg)
}
__setup("xive=off", xive_off);
-static void xive_debug_show_cpu(struct seq_file *m, int cpu)
+static int __init xive_store_eoi_cmdline(char *arg)
+{
+ if (!arg)
+ return -EINVAL;
+
+ if (strncmp(arg, "off", 3) == 0) {
+ pr_info("StoreEOI disabled on kernel command line\n");
+ xive_store_eoi = false;
+ }
+ return 0;
+}
+__setup("xive.store-eoi=", xive_store_eoi_cmdline);
+
+#ifdef CONFIG_DEBUG_FS
+static void xive_debug_show_ipi(struct seq_file *m, int cpu)
{
struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
- seq_printf(m, "CPU %d:", cpu);
+ seq_printf(m, "CPU %d: ", cpu);
if (xc) {
seq_printf(m, "pp=%02x CPPR=%02x ", xc->pending_prio, xc->cppr);
#ifdef CONFIG_SMP
{
- u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET);
+ char buffer[128];
- seq_printf(m, "IPI=0x%08x PQ=%c%c ", xc->hw_ipi,
- val & XIVE_ESB_VAL_P ? 'P' : '-',
- val & XIVE_ESB_VAL_Q ? 'Q' : '-');
+ xive_irq_data_dump(&xc->ipi_data, buffer, sizeof(buffer));
+ seq_printf(m, "IPI=0x%08x %s", xc->hw_ipi, buffer);
}
#endif
- {
- struct xive_q *q = &xc->queue[xive_irq_priority];
- u32 i0, i1, idx;
-
- if (q->qpage) {
- idx = q->idx;
- i0 = be32_to_cpup(q->qpage + idx);
- idx = (idx + 1) & q->msk;
- i1 = be32_to_cpup(q->qpage + idx);
- seq_printf(m, "EQ idx=%d T=%d %08x %08x ...",
- q->idx, q->toggle, i0, i1);
- }
- }
}
seq_puts(m, "\n");
}
@@ -1732,8 +1753,7 @@ static void xive_debug_show_irq(struct seq_file *m, struct irq_data *d)
u32 target;
u8 prio;
u32 lirq;
- struct xive_irq_data *xd;
- u64 val;
+ char buffer[128];
rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq);
if (rc) {
@@ -1744,43 +1764,101 @@ static void xive_debug_show_irq(struct seq_file *m, struct irq_data *d)
seq_printf(m, "IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ",
hw_irq, target, prio, lirq);
- xd = irq_data_get_irq_handler_data(d);
- val = xive_esb_read(xd, XIVE_ESB_GET);
- seq_printf(m, "flags=%c%c%c PQ=%c%c",
- xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ',
- xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ',
- xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ',
- val & XIVE_ESB_VAL_P ? 'P' : '-',
- val & XIVE_ESB_VAL_Q ? 'Q' : '-');
+ xive_irq_data_dump(irq_data_get_irq_handler_data(d), buffer, sizeof(buffer));
+ seq_puts(m, buffer);
seq_puts(m, "\n");
}
-static int xive_core_debug_show(struct seq_file *m, void *private)
+static int xive_irq_debug_show(struct seq_file *m, void *private)
{
unsigned int i;
struct irq_desc *desc;
+
+ for_each_irq_desc(i, desc) {
+ struct irq_data *d = irq_domain_get_irq_data(xive_irq_domain, i);
+
+ if (d)
+ xive_debug_show_irq(m, d);
+ }
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(xive_irq_debug);
+
+static int xive_ipi_debug_show(struct seq_file *m, void *private)
+{
int cpu;
if (xive_ops->debug_show)
xive_ops->debug_show(m, private);
for_each_possible_cpu(cpu)
- xive_debug_show_cpu(m, cpu);
+ xive_debug_show_ipi(m, cpu);
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(xive_ipi_debug);
- for_each_irq_desc(i, desc) {
- struct irq_data *d = irq_domain_get_irq_data(xive_irq_domain, i);
+static void xive_eq_debug_show_one(struct seq_file *m, struct xive_q *q, u8 prio)
+{
+ int i;
- if (d)
- xive_debug_show_irq(m, d);
+ seq_printf(m, "EQ%d idx=%d T=%d\n", prio, q->idx, q->toggle);
+ if (q->qpage) {
+ for (i = 0; i < q->msk + 1; i++) {
+ if (!(i % 8))
+ seq_printf(m, "%05d ", i);
+ seq_printf(m, "%08x%s", be32_to_cpup(q->qpage + i),
+ (i + 1) % 8 ? " " : "\n");
+ }
}
+ seq_puts(m, "\n");
+}
+
+static int xive_eq_debug_show(struct seq_file *m, void *private)
+{
+ int cpu = (long)m->private;
+ struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
+
+ if (xc)
+ xive_eq_debug_show_one(m, &xc->queue[xive_irq_priority],
+ xive_irq_priority);
return 0;
}
-DEFINE_SHOW_ATTRIBUTE(xive_core_debug);
+DEFINE_SHOW_ATTRIBUTE(xive_eq_debug);
+
+static void xive_core_debugfs_create(void)
+{
+ struct dentry *xive_dir;
+ struct dentry *xive_eq_dir;
+ long cpu;
+ char name[16];
+
+ xive_dir = debugfs_create_dir("xive", arch_debugfs_dir);
+ if (IS_ERR(xive_dir))
+ return;
+
+ debugfs_create_file("ipis", 0400, xive_dir,
+ NULL, &xive_ipi_debug_fops);
+ debugfs_create_file("interrupts", 0400, xive_dir,
+ NULL, &xive_irq_debug_fops);
+ xive_eq_dir = debugfs_create_dir("eqs", xive_dir);
+ for_each_possible_cpu(cpu) {
+ snprintf(name, sizeof(name), "cpu%ld", cpu);
+ debugfs_create_file(name, 0400, xive_eq_dir, (void *)cpu,
+ &xive_eq_debug_fops);
+ }
+ debugfs_create_bool("store-eoi", 0600, xive_dir, &xive_store_eoi);
+
+ if (xive_ops->debug_create)
+ xive_ops->debug_create(xive_dir);
+}
+#else
+static inline void xive_core_debugfs_create(void) { }
+#endif /* CONFIG_DEBUG_FS */
int xive_core_debug_init(void)
{
- if (xive_enabled())
- debugfs_create_file("xive", 0400, arch_debugfs_dir,
- NULL, &xive_core_debug_fops);
+ if (xive_enabled() && IS_ENABLED(CONFIG_DEBUG_FS))
+ xive_core_debugfs_create();
+
return 0;
}
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index 1aec282cd650..f940428ad13f 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -41,7 +41,7 @@ static u32 xive_queue_shift;
static u32 xive_pool_vps = XIVE_INVALID_VP;
static struct kmem_cache *xive_provision_cache;
static bool xive_has_single_esc;
-static bool xive_has_save_restore;
+bool xive_has_save_restore;
int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
{
@@ -63,6 +63,8 @@ int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
opal_flags = be64_to_cpu(flags);
if (opal_flags & OPAL_XIVE_IRQ_STORE_EOI)
data->flags |= XIVE_IRQ_FLAG_STORE_EOI;
+ if (opal_flags & OPAL_XIVE_IRQ_STORE_EOI2)
+ data->flags |= XIVE_IRQ_FLAG_STORE_EOI;
if (opal_flags & OPAL_XIVE_IRQ_LSI)
data->flags |= XIVE_IRQ_FLAG_LSI;
data->eoi_page = be64_to_cpu(eoi_page);
@@ -459,6 +461,14 @@ void xive_native_sync_queue(u32 hw_irq)
}
EXPORT_SYMBOL_GPL(xive_native_sync_queue);
+#ifdef CONFIG_DEBUG_FS
+static int xive_native_debug_create(struct dentry *xive_dir)
+{
+ debugfs_create_bool("save-restore", 0600, xive_dir, &xive_has_save_restore);
+ return 0;
+}
+#endif
+
static const struct xive_ops xive_native_ops = {
.populate_irq_data = xive_native_populate_irq_data,
.configure_irq = xive_native_configure_irq,
@@ -476,10 +486,13 @@ static const struct xive_ops xive_native_ops = {
.get_ipi = xive_native_get_ipi,
.put_ipi = xive_native_put_ipi,
#endif /* CONFIG_SMP */
+#ifdef CONFIG_DEBUG_FS
+ .debug_create = xive_native_debug_create,
+#endif /* CONFIG_DEBUG_FS */
.name = "native",
};
-static bool xive_parse_provisioning(struct device_node *np)
+static bool __init xive_parse_provisioning(struct device_node *np)
{
int rc;
@@ -519,7 +532,7 @@ static bool xive_parse_provisioning(struct device_node *np)
return true;
}
-static void xive_native_setup_pools(void)
+static void __init xive_native_setup_pools(void)
{
/* Allocate a pool big enough */
pr_debug("XIVE: Allocating VP block for pool size %u\n", nr_cpu_ids);
diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c
index f143b6f111ac..928f95004501 100644
--- a/arch/powerpc/sysdev/xive/spapr.c
+++ b/arch/powerpc/sysdev/xive/spapr.c
@@ -44,7 +44,7 @@ struct xive_irq_bitmap {
static LIST_HEAD(xive_irq_bitmaps);
-static int xive_irq_bitmap_add(int base, int count)
+static int __init xive_irq_bitmap_add(int base, int count)
{
struct xive_irq_bitmap *xibm;
@@ -173,7 +173,7 @@ static long plpar_int_get_source_info(unsigned long flags,
} while (plpar_busy_delay(rc));
if (rc) {
- pr_err("H_INT_GET_SOURCE_INFO lisn=%ld failed %ld\n", lisn, rc);
+ pr_err("H_INT_GET_SOURCE_INFO lisn=0x%lx failed %ld\n", lisn, rc);
return rc;
}
@@ -182,8 +182,8 @@ static long plpar_int_get_source_info(unsigned long flags,
*trig_page = retbuf[2];
*esb_shift = retbuf[3];
- pr_devel("H_INT_GET_SOURCE_INFO flags=%lx eoi=%lx trig=%lx shift=%lx\n",
- retbuf[0], retbuf[1], retbuf[2], retbuf[3]);
+ pr_debug("H_INT_GET_SOURCE_INFO lisn=0x%lx flags=0x%lx eoi=0x%lx trig=0x%lx shift=0x%lx\n",
+ lisn, retbuf[0], retbuf[1], retbuf[2], retbuf[3]);
return 0;
}
@@ -200,8 +200,8 @@ static long plpar_int_set_source_config(unsigned long flags,
long rc;
- pr_devel("H_INT_SET_SOURCE_CONFIG flags=%lx lisn=%lx target=%lx prio=%lx sw_irq=%lx\n",
- flags, lisn, target, prio, sw_irq);
+ pr_debug("H_INT_SET_SOURCE_CONFIG flags=0x%lx lisn=0x%lx target=%ld prio=%ld sw_irq=%ld\n",
+ flags, lisn, target, prio, sw_irq);
do {
@@ -210,7 +210,7 @@ static long plpar_int_set_source_config(unsigned long flags,
} while (plpar_busy_delay(rc));
if (rc) {
- pr_err("H_INT_SET_SOURCE_CONFIG lisn=%ld target=%lx prio=%lx failed %ld\n",
+ pr_err("H_INT_SET_SOURCE_CONFIG lisn=0x%lx target=%ld prio=%ld failed %ld\n",
lisn, target, prio, rc);
return rc;
}
@@ -227,7 +227,7 @@ static long plpar_int_get_source_config(unsigned long flags,
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
long rc;
- pr_devel("H_INT_GET_SOURCE_CONFIG flags=%lx lisn=%lx\n", flags, lisn);
+ pr_debug("H_INT_GET_SOURCE_CONFIG flags=0x%lx lisn=0x%lx\n", flags, lisn);
do {
rc = plpar_hcall(H_INT_GET_SOURCE_CONFIG, retbuf, flags, lisn,
@@ -235,7 +235,7 @@ static long plpar_int_get_source_config(unsigned long flags,
} while (plpar_busy_delay(rc));
if (rc) {
- pr_err("H_INT_GET_SOURCE_CONFIG lisn=%ld failed %ld\n",
+ pr_err("H_INT_GET_SOURCE_CONFIG lisn=0x%lx failed %ld\n",
lisn, rc);
return rc;
}
@@ -244,8 +244,8 @@ static long plpar_int_get_source_config(unsigned long flags,
*prio = retbuf[1];
*sw_irq = retbuf[2];
- pr_devel("H_INT_GET_SOURCE_CONFIG target=%lx prio=%lx sw_irq=%lx\n",
- retbuf[0], retbuf[1], retbuf[2]);
+ pr_debug("H_INT_GET_SOURCE_CONFIG target=%ld prio=%ld sw_irq=%ld\n",
+ retbuf[0], retbuf[1], retbuf[2]);
return 0;
}
@@ -273,8 +273,8 @@ static long plpar_int_get_queue_info(unsigned long flags,
*esn_page = retbuf[0];
*esn_size = retbuf[1];
- pr_devel("H_INT_GET_QUEUE_INFO page=%lx size=%lx\n",
- retbuf[0], retbuf[1]);
+ pr_debug("H_INT_GET_QUEUE_INFO cpu=%ld prio=%ld page=0x%lx size=0x%lx\n",
+ target, priority, retbuf[0], retbuf[1]);
return 0;
}
@@ -289,8 +289,8 @@ static long plpar_int_set_queue_config(unsigned long flags,
{
long rc;
- pr_devel("H_INT_SET_QUEUE_CONFIG flags=%lx target=%lx priority=%lx qpage=%lx qsize=%lx\n",
- flags, target, priority, qpage, qsize);
+ pr_debug("H_INT_SET_QUEUE_CONFIG flags=0x%lx target=%ld priority=0x%lx qpage=0x%lx qsize=0x%lx\n",
+ flags, target, priority, qpage, qsize);
do {
rc = plpar_hcall_norets(H_INT_SET_QUEUE_CONFIG, flags, target,
@@ -298,7 +298,7 @@ static long plpar_int_set_queue_config(unsigned long flags,
} while (plpar_busy_delay(rc));
if (rc) {
- pr_err("H_INT_SET_QUEUE_CONFIG cpu=%ld prio=%ld qpage=%lx returned %ld\n",
+ pr_err("H_INT_SET_QUEUE_CONFIG cpu=%ld prio=%ld qpage=0x%lx returned %ld\n",
target, priority, qpage, rc);
return rc;
}
@@ -315,7 +315,7 @@ static long plpar_int_sync(unsigned long flags, unsigned long lisn)
} while (plpar_busy_delay(rc));
if (rc) {
- pr_err("H_INT_SYNC lisn=%ld returned %ld\n", lisn, rc);
+ pr_err("H_INT_SYNC lisn=0x%lx returned %ld\n", lisn, rc);
return rc;
}
@@ -333,8 +333,8 @@ static long plpar_int_esb(unsigned long flags,
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
long rc;
- pr_devel("H_INT_ESB flags=%lx lisn=%lx offset=%lx in=%lx\n",
- flags, lisn, offset, in_data);
+ pr_debug("H_INT_ESB flags=0x%lx lisn=0x%lx offset=0x%lx in=0x%lx\n",
+ flags, lisn, offset, in_data);
do {
rc = plpar_hcall(H_INT_ESB, retbuf, flags, lisn, offset,
@@ -342,7 +342,7 @@ static long plpar_int_esb(unsigned long flags,
} while (plpar_busy_delay(rc));
if (rc) {
- pr_err("H_INT_ESB lisn=%ld offset=%ld returned %ld\n",
+ pr_err("H_INT_ESB lisn=0x%lx offset=0x%lx returned %ld\n",
lisn, offset, rc);
return rc;
}
@@ -653,6 +653,9 @@ static int xive_spapr_debug_show(struct seq_file *m, void *private)
struct xive_irq_bitmap *xibm;
char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
list_for_each_entry(xibm, &xive_irq_bitmaps, list) {
memset(buf, 0, PAGE_SIZE);
bitmap_print_to_pagebuf(true, buf, xibm->bitmap, xibm->count);
@@ -687,7 +690,7 @@ static const struct xive_ops xive_spapr_ops = {
/*
* get max priority from "/ibm,plat-res-int-priorities"
*/
-static bool xive_get_max_prio(u8 *max_prio)
+static bool __init xive_get_max_prio(u8 *max_prio)
{
struct device_node *rootdn;
const __be32 *reg;
@@ -741,7 +744,7 @@ static bool xive_get_max_prio(u8 *max_prio)
return true;
}
-static const u8 *get_vec5_feature(unsigned int index)
+static const u8 *__init get_vec5_feature(unsigned int index)
{
unsigned long root, chosen;
int size;
diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h
index 504e7edce358..fe6d95d54af9 100644
--- a/arch/powerpc/sysdev/xive/xive-internal.h
+++ b/arch/powerpc/sysdev/xive/xive-internal.h
@@ -58,6 +58,7 @@ struct xive_ops {
void (*put_ipi)(unsigned int cpu, struct xive_cpu *xc);
#endif
int (*debug_show)(struct seq_file *m, void *private);
+ int (*debug_create)(struct dentry *xive_dir);
const char *name;
};
@@ -72,5 +73,6 @@ static inline u32 xive_alloc_order(u32 queue_shift)
}
extern bool xive_cmdline_disabled;
+extern bool xive_has_save_restore;
#endif /* __XIVE_INTERNAL_H */
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 8b28ff9d98d1..fd72753e8ad5 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -125,7 +125,7 @@ static unsigned bpinstr = 0x7fe00008; /* trap */
static int cmds(struct pt_regs *);
static int mread(unsigned long, void *, int);
static int mwrite(unsigned long, void *, int);
-static int mread_instr(unsigned long, struct ppc_inst *);
+static int mread_instr(unsigned long, ppc_inst_t *);
static int handle_fault(struct pt_regs *);
static void byterev(unsigned char *, int);
static void memex(void);
@@ -908,7 +908,7 @@ static struct bpt *new_breakpoint(unsigned long a)
static void insert_bpts(void)
{
int i;
- struct ppc_inst instr, instr2;
+ ppc_inst_t instr, instr2;
struct bpt *bp, *bp2;
bp = bpts;
@@ -988,7 +988,7 @@ static void remove_bpts(void)
{
int i;
struct bpt *bp;
- struct ppc_inst instr;
+ ppc_inst_t instr;
bp = bpts;
for (i = 0; i < NBPTS; ++i, ++bp) {
@@ -1159,7 +1159,7 @@ cmds(struct pt_regs *excp)
case 'P':
show_tasks();
break;
-#ifdef CONFIG_PPC_BOOK3S
+#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_64S_HASH_MMU)
case 'u':
dump_segments();
break;
@@ -1204,7 +1204,7 @@ static int do_step(struct pt_regs *regs)
*/
static int do_step(struct pt_regs *regs)
{
- struct ppc_inst instr;
+ ppc_inst_t instr;
int stepped;
force_enable_xmon();
@@ -1459,7 +1459,7 @@ csum(void)
*/
static long check_bp_loc(unsigned long addr)
{
- struct ppc_inst instr;
+ ppc_inst_t instr;
addr &= ~3;
if (!is_kernel_addr(addr)) {
@@ -2107,8 +2107,14 @@ static void dump_300_sprs(void)
if (!cpu_has_feature(CPU_FTR_ARCH_300))
return;
- printf("pidr = %.16lx tidr = %.16lx\n",
- mfspr(SPRN_PID), mfspr(SPRN_TIDR));
+ if (cpu_has_feature(CPU_FTR_P9_TIDR)) {
+ printf("pidr = %.16lx tidr = %.16lx\n",
+ mfspr(SPRN_PID), mfspr(SPRN_TIDR));
+ } else {
+ printf("pidr = %.16lx\n",
+ mfspr(SPRN_PID));
+ }
+
printf("psscr = %.16lx\n",
hv ? mfspr(SPRN_PSSCR) : mfspr(SPRN_PSSCR_PR));
@@ -2300,7 +2306,7 @@ mwrite(unsigned long adrs, void *buf, int size)
}
static int
-mread_instr(unsigned long adrs, struct ppc_inst *instr)
+mread_instr(unsigned long adrs, ppc_inst_t *instr)
{
volatile int n;
@@ -2608,7 +2614,7 @@ static void dump_tracing(void)
static void dump_one_paca(int cpu)
{
struct paca_struct *p;
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
int i = 0;
#endif
@@ -2650,6 +2656,7 @@ static void dump_one_paca(int cpu)
DUMP(p, cpu_start, "%#-*x");
DUMP(p, kexec_state, "%#-*x");
#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (!early_radix_enabled()) {
for (i = 0; i < SLB_NUM_BOLTED; i++) {
u64 esid, vsid;
@@ -2677,6 +2684,7 @@ static void dump_one_paca(int cpu)
22, "slb_cache", i, p->slb_cache[i]);
}
}
+#endif
DUMP(p, rfi_flush_fallback_area, "%-*px");
#endif
@@ -2809,12 +2817,12 @@ static void dump_all_xives(void)
{
int cpu;
- if (num_possible_cpus() == 0) {
+ if (num_online_cpus() == 0) {
printf("No possible cpus, use 'dx #' to dump individual cpus\n");
return;
}
- for_each_possible_cpu(cpu)
+ for_each_online_cpu(cpu)
dump_one_xive(cpu);
}
@@ -3020,7 +3028,7 @@ generic_inst_dump(unsigned long adr, long count, int praddr,
{
int nr, dotted;
unsigned long first_adr;
- struct ppc_inst inst, last_inst = ppc_inst(0);
+ ppc_inst_t inst, last_inst = ppc_inst(0);
dotted = 0;
for (first_adr = adr; count > 0; --count, adr += ppc_inst_len(inst)) {
@@ -3740,7 +3748,7 @@ static void xmon_print_symbol(unsigned long address, const char *mid,
printf("%s", after);
}
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
void dump_segments(void)
{
int i;
@@ -4128,7 +4136,7 @@ struct spu_info {
static struct spu_info spu_info[XMON_NUM_SPUS];
-void xmon_register_spus(struct list_head *list)
+void __init xmon_register_spus(struct list_head *list)
{
struct spu *spu;
diff --git a/arch/powerpc/xmon/xmon_bpts.h b/arch/powerpc/xmon/xmon_bpts.h
index 57e6fb03de48..377068f52edb 100644
--- a/arch/powerpc/xmon/xmon_bpts.h
+++ b/arch/powerpc/xmon/xmon_bpts.h
@@ -5,8 +5,8 @@
#define NBPTS 256
#ifndef __ASSEMBLY__
#include <asm/inst.h>
-#define BPT_SIZE (sizeof(struct ppc_inst) * 2)
-#define BPT_WORDS (BPT_SIZE / sizeof(struct ppc_inst))
+#define BPT_SIZE (sizeof(ppc_inst_t) * 2)
+#define BPT_WORDS (BPT_SIZE / sizeof(ppc_inst_t))
extern unsigned int bpt_table[NBPTS * BPT_WORDS];
#endif /* __ASSEMBLY__ */
diff --git a/drivers/macintosh/mediabay.c b/drivers/macintosh/mediabay.c
index eab7e83c11c4..b17660c022eb 100644
--- a/drivers/macintosh/mediabay.c
+++ b/drivers/macintosh/mediabay.c
@@ -703,7 +703,7 @@ static const struct mb_ops keylargo_mb_ops = {
* Therefore we do it all by polling the media bay once each tick.
*/
-static struct of_device_id media_bay_match[] =
+static const struct of_device_id media_bay_match[] =
{
{
.name = "media-bay",
diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig
index 51aecafdcbdf..5efc4151bf58 100644
--- a/drivers/misc/cxl/Kconfig
+++ b/drivers/misc/cxl/Kconfig
@@ -6,6 +6,7 @@
config CXL_BASE
bool
select PPC_COPRO_BASE
+ select PPC_64S_HASH_MMU
config CXL
tristate "Support for IBM Coherent Accelerators (CXL)"
diff --git a/drivers/misc/lkdtm/Makefile b/drivers/misc/lkdtm/Makefile
index aa12097668d3..83a7baf5df82 100644
--- a/drivers/misc/lkdtm/Makefile
+++ b/drivers/misc/lkdtm/Makefile
@@ -11,7 +11,7 @@ lkdtm-$(CONFIG_LKDTM) += usercopy.o
lkdtm-$(CONFIG_LKDTM) += stackleak.o
lkdtm-$(CONFIG_LKDTM) += cfi.o
lkdtm-$(CONFIG_LKDTM) += fortify.o
-lkdtm-$(CONFIG_PPC_BOOK3S_64) += powerpc.o
+lkdtm-$(CONFIG_PPC_64S_HASH_MMU) += powerpc.o
KASAN_SANITIZE_rodata.o := n
KASAN_SANITIZE_stackleak.o := n
diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c
index 609d9ee2acc0..82fb276f7e09 100644
--- a/drivers/misc/lkdtm/core.c
+++ b/drivers/misc/lkdtm/core.c
@@ -182,7 +182,7 @@ static const struct crashtype crashtypes[] = {
CRASHTYPE(FORTIFIED_SUBOBJECT),
CRASHTYPE(FORTIFIED_STRSCPY),
CRASHTYPE(DOUBLE_FAULT),
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
CRASHTYPE(PPC_SLB_MULTIHIT),
#endif
};
diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c
index e70525eedaae..d881f5e40ad9 100644
--- a/drivers/misc/ocxl/file.c
+++ b/drivers/misc/ocxl/file.c
@@ -74,7 +74,6 @@ static long afu_ioctl_attach(struct ocxl_context *ctx,
{
struct ocxl_ioctl_attach arg;
u64 amr = 0;
- int rc;
pr_debug("%s for context %d\n", __func__, ctx->pasid);
@@ -86,8 +85,7 @@ static long afu_ioctl_attach(struct ocxl_context *ctx,
return -EINVAL;
amr = arg.amr & mfspr(SPRN_UAMOR);
- rc = ocxl_context_attach(ctx, amr, current->mm);
- return rc;
+ return ocxl_context_attach(ctx, amr, current->mm);
}
static long afu_ioctl_get_metadata(struct ocxl_context *ctx,
diff --git a/include/linux/cuda.h b/include/linux/cuda.h
index 45bfe9d61271..daf3e6f98444 100644
--- a/include/linux/cuda.h
+++ b/include/linux/cuda.h
@@ -12,7 +12,7 @@
#include <uapi/linux/cuda.h>
-extern int find_via_cuda(void);
+extern int __init find_via_cuda(void);
extern int cuda_request(struct adb_request *req,
void (*done)(struct adb_request *), int nbytes, ...);
extern void cuda_poll(void);
diff --git a/include/linux/pmu.h b/include/linux/pmu.h
index 52453a24a24f..c677442d007c 100644
--- a/include/linux/pmu.h
+++ b/include/linux/pmu.h
@@ -13,7 +13,7 @@
#include <uapi/linux/pmu.h>
-extern int find_via_pmu(void);
+extern int __init find_via_pmu(void);
extern int pmu_request(struct adb_request *req,
void (*done)(struct adb_request *), int nbytes, ...);
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index bd8860eeb291..1b65042ab1db 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -1332,7 +1332,10 @@ union perf_mem_data_src {
/* hop level */
#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */
-/* 2-7 available */
+#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */
+#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */
+#define PERF_MEM_HOPS_3 0x04 /* remote board */
+/* 5-7 available */
#define PERF_MEM_HOPS_SHIFT 43
#define PERF_MEM_S(a, s) \
diff --git a/tools/testing/selftests/powerpc/security/mitigation-patching.sh b/tools/testing/selftests/powerpc/security/mitigation-patching.sh
index b0b20e0b4e30..f43aa4b77fba 100755
--- a/tools/testing/selftests/powerpc/security/mitigation-patching.sh
+++ b/tools/testing/selftests/powerpc/security/mitigation-patching.sh
@@ -44,7 +44,10 @@ mitigations="barrier_nospec stf_barrier count_cache_flush rfi_flush entry_flush
for m in $mitigations
do
- do_one "$m" &
+ if [[ -f /sys/kernel/debug/powerpc/$m ]]
+ then
+ do_one "$m" &
+ fi
done
echo "Spawned threads enabling/disabling mitigations ..."
diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c
index adc2b7294e5f..83647b8277e7 100644
--- a/tools/testing/selftests/powerpc/security/spectre_v2.c
+++ b/tools/testing/selftests/powerpc/security/spectre_v2.c
@@ -193,7 +193,7 @@ int spectre_v2_test(void)
* We are not vulnerable and reporting otherwise, so
* missing such a mismatch is safe.
*/
- if (state == VULNERABLE)
+ if (miss_percent > 95)
return 4;
return 1;
diff --git a/tools/testing/selftests/powerpc/signal/.gitignore b/tools/testing/selftests/powerpc/signal/.gitignore
index ce3375cd8e73..9d0915777fed 100644
--- a/tools/testing/selftests/powerpc/signal/.gitignore
+++ b/tools/testing/selftests/powerpc/signal/.gitignore
@@ -4,3 +4,5 @@ signal_tm
sigfuz
sigreturn_vdso
sig_sc_double_restart
+sigreturn_kernel
+sigreturn_unaligned
diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile
index d6ae54663aed..f679d260afc8 100644
--- a/tools/testing/selftests/powerpc/signal/Makefile
+++ b/tools/testing/selftests/powerpc/signal/Makefile
@@ -1,5 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
TEST_GEN_PROGS := signal signal_tm sigfuz sigreturn_vdso sig_sc_double_restart
+TEST_GEN_PROGS += sigreturn_kernel
+TEST_GEN_PROGS += sigreturn_unaligned
CFLAGS += -maltivec
$(OUTPUT)/signal_tm: CFLAGS += -mhtm
diff --git a/tools/testing/selftests/powerpc/signal/sigreturn_kernel.c b/tools/testing/selftests/powerpc/signal/sigreturn_kernel.c
new file mode 100644
index 000000000000..0a1b6e591eee
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/sigreturn_kernel.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that we can't sigreturn to kernel addresses, or to kernel mode.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+#define MSR_PR (1ul << 14)
+
+static volatile unsigned long long sigreturn_addr;
+static volatile unsigned long long sigreturn_msr_mask;
+
+static void sigusr1_handler(int signo, siginfo_t *si, void *uc_ptr)
+{
+ ucontext_t *uc = (ucontext_t *)uc_ptr;
+
+ if (sigreturn_addr)
+ UCONTEXT_NIA(uc) = sigreturn_addr;
+
+ if (sigreturn_msr_mask)
+ UCONTEXT_MSR(uc) &= sigreturn_msr_mask;
+}
+
+static pid_t fork_child(void)
+{
+ pid_t pid;
+
+ pid = fork();
+ if (pid == 0) {
+ raise(SIGUSR1);
+ exit(0);
+ }
+
+ return pid;
+}
+
+static int expect_segv(pid_t pid)
+{
+ int child_ret;
+
+ waitpid(pid, &child_ret, 0);
+ FAIL_IF(WIFEXITED(child_ret));
+ FAIL_IF(!WIFSIGNALED(child_ret));
+ FAIL_IF(WTERMSIG(child_ret) != 11);
+
+ return 0;
+}
+
+int test_sigreturn_kernel(void)
+{
+ struct sigaction act;
+ int child_ret, i;
+ pid_t pid;
+
+ act.sa_sigaction = sigusr1_handler;
+ act.sa_flags = SA_SIGINFO;
+ sigemptyset(&act.sa_mask);
+
+ FAIL_IF(sigaction(SIGUSR1, &act, NULL));
+
+ for (i = 0; i < 2; i++) {
+ // Return to kernel
+ sigreturn_addr = 0xcull << 60;
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Return to kernel virtual
+ sigreturn_addr = 0xc008ull << 48;
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Return out of range
+ sigreturn_addr = 0xc010ull << 48;
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Return to no-man's land, just below PAGE_OFFSET
+ sigreturn_addr = (0xcull << 60) - (64 * 1024);
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Return to no-man's land, above TASK_SIZE_4PB
+ sigreturn_addr = 0x1ull << 52;
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Return to 0xd space
+ sigreturn_addr = 0xdull << 60;
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Return to 0xe space
+ sigreturn_addr = 0xeull << 60;
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Return to 0xf space
+ sigreturn_addr = 0xfull << 60;
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Attempt to set PR=0 for 2nd loop (should be blocked by kernel)
+ sigreturn_msr_mask = ~MSR_PR;
+ }
+
+ printf("All children killed as expected\n");
+
+ // Don't change address, just MSR, should return to user as normal
+ sigreturn_addr = 0;
+ sigreturn_msr_mask = ~MSR_PR;
+ pid = fork_child();
+ waitpid(pid, &child_ret, 0);
+ FAIL_IF(!WIFEXITED(child_ret));
+ FAIL_IF(WIFSIGNALED(child_ret));
+ FAIL_IF(WEXITSTATUS(child_ret) != 0);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_sigreturn_kernel, "sigreturn_kernel");
+}
diff --git a/tools/testing/selftests/powerpc/signal/sigreturn_unaligned.c b/tools/testing/selftests/powerpc/signal/sigreturn_unaligned.c
new file mode 100644
index 000000000000..6e58ee4f0fdf
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/sigreturn_unaligned.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test sigreturn to an unaligned address, ie. low 2 bits set.
+ * Nothing bad should happen.
+ * This was able to trigger warnings with CONFIG_PPC_RFI_SRR_DEBUG=y.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+
+static void sigusr1_handler(int signo, siginfo_t *info, void *ptr)
+{
+ ucontext_t *uc = ptr;
+
+ UCONTEXT_NIA(uc) |= 3;
+}
+
+static int test_sigreturn_unaligned(void)
+{
+ struct sigaction action;
+
+ memset(&action, 0, sizeof(action));
+ action.sa_sigaction = sigusr1_handler;
+ action.sa_flags = SA_SIGINFO;
+
+ FAIL_IF(sigaction(SIGUSR1, &action, NULL) == -1);
+
+ raise(SIGUSR1);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_sigreturn_unaligned, "sigreturn_unaligned");
+}