summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2018-02-05 12:55:38 -0700
committerJens Axboe <axboe@kernel.dk>2018-02-05 12:55:38 -0700
commit9e05c864993c5442227f83ae1694a737d7a102ed (patch)
treee35b60bb3c0c179f147e9acaad5444f1e5d9117e /arch
parent3c15f3f545afa320c5e3822825a9a53c664776b6 (diff)
parent35277995e17919ab838beae765f440674e8576eb (diff)
Merge branch 'master' into test
* master: (688 commits) dt-bindings: mailbox: qcom: Document the APCS clock binding mailbox: qcom: Create APCS child device for clock controller mailbox: qcom: Convert APCS IPC driver to use regmap KVM/SVM: Allow direct access to MSR_IA32_SPEC_CTRL KVM/VMX: Allow direct access to MSR_IA32_SPEC_CTRL KVM/VMX: Emulate MSR_IA32_ARCH_CAPABILITIES KVM/x86: Add IBPB support KVM/x86: Update the reverse_cpuid list to include CPUID_7_EDX pinctrl: remove include file from <linux/device.h> firmware: dmi: handle missing DMI data gracefully firmware: dmi_scan: Fix handling of empty DMI strings firmware: dmi_scan: Drop dmi_initialized firmware: dmi: Optimize dmi_matches Revert "defer call to mem_cgroup_sk_alloc()" soreuseport: fix mem leak in reuseport_add_sock() net: qlge: use memmove instead of skb_copy_to_linear_data net: qed: use correct strncpy() size net: cxgb4: avoid memcpy beyond end of source buffer cls_u32: add missing RCU annotation. r8152: set rx mode early when linking on ...
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig11
-rw-r--r--arch/alpha/include/asm/futex.h8
-rw-r--r--arch/alpha/include/asm/processor.h5
-rw-r--r--arch/alpha/include/uapi/asm/termbits.h6
-rw-r--r--arch/alpha/kernel/pci.c2
-rw-r--r--arch/alpha/kernel/pci_impl.h3
-rw-r--r--arch/alpha/kernel/process.c5
-rw-r--r--arch/alpha/kernel/sys_nautilus.c2
-rw-r--r--arch/alpha/kernel/traps.c13
-rw-r--r--arch/arm/Kconfig13
-rw-r--r--arch/arm/Kconfig.debug33
-rw-r--r--arch/arm/Makefile6
-rw-r--r--arch/arm/boot/compressed/string.c5
-rw-r--r--arch/arm/boot/compressed/vmlinux.lds.S8
-rw-r--r--arch/arm/common/bL_switcher_dummy_if.c4
-rw-r--r--arch/arm/common/sa1111.c328
-rw-r--r--arch/arm/include/asm/exception.h3
-rw-r--r--arch/arm/include/asm/glue-cache.h4
-rw-r--r--arch/arm/include/asm/hardware/cache-b15-rac.h10
-rw-r--r--arch/arm/include/asm/hardware/sa1111.h32
-rw-r--r--arch/arm/include/asm/memory.h1
-rw-r--r--arch/arm/include/asm/processor.h10
-rw-r--r--arch/arm/include/asm/ptdump.h43
-rw-r--r--arch/arm/include/asm/sections.h21
-rw-r--r--arch/arm/include/asm/string.h14
-rw-r--r--arch/arm/include/asm/traps.h12
-rw-r--r--arch/arm/include/asm/unified.h77
-rw-r--r--arch/arm/kernel/armksyms.c1
-rw-r--r--arch/arm/kernel/entry-armv.S6
-rw-r--r--arch/arm/kernel/entry-common.S1
-rw-r--r--arch/arm/kernel/head-common.S5
-rw-r--r--arch/arm/kernel/hw_breakpoint.c10
-rw-r--r--arch/arm/kernel/smp.c3
-rw-r--r--arch/arm/kernel/stacktrace.c14
-rw-r--r--arch/arm/kernel/traps.c4
-rw-r--r--arch/arm/kernel/vmlinux-xip.lds.S6
-rw-r--r--arch/arm/kernel/vmlinux.lds.S6
-rw-r--r--arch/arm/lib/Makefile2
-rw-r--r--arch/arm/lib/memzero.S137
-rw-r--r--arch/arm/mach-davinci/board-dm355-evm.c15
-rw-r--r--arch/arm/mach-davinci/board-dm644x-evm.c15
-rw-r--r--arch/arm/mach-sa1100/Kconfig1
-rw-r--r--arch/arm/mach-sa1100/assabet.c71
-rw-r--r--arch/arm/mach-sa1100/neponset.c159
-rw-r--r--arch/arm/mm/Kconfig8
-rw-r--r--arch/arm/mm/Makefile4
-rw-r--r--arch/arm/mm/cache-b15-rac.c356
-rw-r--r--arch/arm/mm/cache-v7.S21
-rw-r--r--arch/arm/mm/dump.c151
-rw-r--r--arch/arm/mm/fault.c5
-rw-r--r--arch/arm/mm/idmap.c4
-rw-r--r--arch/arm/mm/init.c2
-rw-r--r--arch/arm/mm/nommu.c4
-rw-r--r--arch/arm/mm/pmsa-v7.c4
-rw-r--r--arch/arm/mm/proc-v7.S6
-rw-r--r--arch/arm/mm/ptdump_debugfs.c34
-rw-r--r--arch/arm/probes/kprobes/core.c14
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/include/asm/processor.h10
-rw-r--r--arch/microblaze/Kconfig.platform1
-rw-r--r--arch/microblaze/Makefile17
-rw-r--r--arch/microblaze/include/asm/io.h2
-rw-r--r--arch/microblaze/mm/pgtable.c2
-rw-r--r--arch/powerpc/Kconfig19
-rw-r--r--arch/powerpc/Kconfig.debug6
-rw-r--r--arch/powerpc/Makefile1
-rw-r--r--arch/powerpc/boot/Makefile8
-rw-r--r--arch/powerpc/boot/dts/a3m071.dts10
-rw-r--r--arch/powerpc/boot/dts/akebono.dts4
-rw-r--r--arch/powerpc/boot/dts/c2k.dts6
-rw-r--r--arch/powerpc/boot/dts/currituck.dts2
-rw-r--r--arch/powerpc/boot/dts/fsl/mpc8568mds.dts12
-rw-r--r--arch/powerpc/boot/dts/fsl/mpc8569mds.dts20
-rw-r--r--arch/powerpc/boot/dts/fsl/p1021mds.dts6
-rw-r--r--arch/powerpc/boot/dts/fsl/p1025rdb.dtsi8
-rw-r--r--arch/powerpc/boot/dts/fsl/p1025rdb_32b.dts2
-rw-r--r--arch/powerpc/boot/dts/fsl/p1025twr.dtsi8
-rw-r--r--arch/powerpc/boot/dts/fsl/t1040rdb.dts2
-rw-r--r--arch/powerpc/boot/dts/fsl/t1042d4rdb.dts10
-rw-r--r--arch/powerpc/boot/dts/fsl/t1042rdb.dts2
-rw-r--r--arch/powerpc/boot/dts/fsl/t104xrdb.dtsi6
-rw-r--r--arch/powerpc/boot/dts/fsp2.dts6
-rw-r--r--arch/powerpc/boot/dts/gamecube.dts14
-rw-r--r--arch/powerpc/boot/dts/haleakala.dts2
-rw-r--r--arch/powerpc/boot/dts/kilauea.dts4
-rw-r--r--arch/powerpc/boot/dts/kmeter1.dts10
-rw-r--r--arch/powerpc/boot/dts/makalu.dts4
-rw-r--r--arch/powerpc/boot/dts/mpc832x_mds.dts10
-rw-r--r--arch/powerpc/boot/dts/mpc832x_rdb.dts8
-rw-r--r--arch/powerpc/boot/dts/mpc836x_mds.dts8
-rw-r--r--arch/powerpc/boot/dts/sbc8548-altflash.dts8
-rw-r--r--arch/powerpc/boot/dts/sbc8548.dts8
-rw-r--r--arch/powerpc/boot/dts/wii.dts32
-rw-r--r--arch/powerpc/boot/serial.c6
-rw-r--r--arch/powerpc/configs/mpc866_ads_defconfig1
-rw-r--r--arch/powerpc/configs/powernv_defconfig2
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgtable.h23
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-4k.h20
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-64k.h71
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash.h8
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu-hash.h5
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu.h10
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h79
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-hash.h1
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-radix.h7
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush.h38
-rw-r--r--arch/powerpc/include/asm/bug.h4
-rw-r--r--arch/powerpc/include/asm/code-patching.h2
-rw-r--r--arch/powerpc/include/asm/cpm.h2
-rw-r--r--arch/powerpc/include/asm/cpm1.h2
-rw-r--r--arch/powerpc/include/asm/cputable.h21
-rw-r--r--arch/powerpc/include/asm/drmem.h102
-rw-r--r--arch/powerpc/include/asm/eeh.h2
-rw-r--r--arch/powerpc/include/asm/exception-64s.h103
-rw-r--r--arch/powerpc/include/asm/firmware.h5
-rw-r--r--arch/powerpc/include/asm/hardirq.h1
-rw-r--r--arch/powerpc/include/asm/head-64.h47
-rw-r--r--arch/powerpc/include/asm/hmi.h4
-rw-r--r--arch/powerpc/include/asm/hugetlb.h3
-rw-r--r--arch/powerpc/include/asm/hw_irq.h161
-rw-r--r--arch/powerpc/include/asm/imc-pmu.h9
-rw-r--r--arch/powerpc/include/asm/irqflags.h14
-rw-r--r--arch/powerpc/include/asm/kexec.h2
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h2
-rw-r--r--arch/powerpc/include/asm/local.h200
-rw-r--r--arch/powerpc/include/asm/machdep.h8
-rw-r--r--arch/powerpc/include/asm/mman.h13
-rw-r--r--arch/powerpc/include/asm/mmu-8xx.h60
-rw-r--r--arch/powerpc/include/asm/mmu.h9
-rw-r--r--arch/powerpc/include/asm/mmu_context.h22
-rw-r--r--arch/powerpc/include/asm/mpic_timer.h8
-rw-r--r--arch/powerpc/include/asm/nmi.h4
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgalloc.h3
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgtable.h2
-rw-r--r--arch/powerpc/include/asm/nohash/32/pte-8xx.h25
-rw-r--r--arch/powerpc/include/asm/nohash/pgtable.h27
-rw-r--r--arch/powerpc/include/asm/nohash/pte-book3e.h1
-rw-r--r--arch/powerpc/include/asm/opal-api.h5
-rw-r--r--arch/powerpc/include/asm/opal.h6
-rw-r--r--arch/powerpc/include/asm/paca.h5
-rw-r--r--arch/powerpc/include/asm/pci-bridge.h9
-rw-r--r--arch/powerpc/include/asm/pci.h2
-rw-r--r--arch/powerpc/include/asm/pkeys.h218
-rw-r--r--arch/powerpc/include/asm/pnv-ocxl.h36
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h2
-rw-r--r--arch/powerpc/include/asm/processor.h5
-rw-r--r--arch/powerpc/include/asm/prom.h27
-rw-r--r--arch/powerpc/include/asm/pte-common.h37
-rw-r--r--arch/powerpc/include/asm/reg.h6
-rw-r--r--arch/powerpc/include/asm/reg_8xx.h82
-rw-r--r--arch/powerpc/include/asm/systbl.h3
-rw-r--r--arch/powerpc/include/asm/unistd.h6
-rw-r--r--arch/powerpc/include/asm/xive-regs.h35
-rw-r--r--arch/powerpc/include/asm/xive.h40
-rw-r--r--arch/powerpc/include/uapi/asm/elf.h1
-rw-r--r--arch/powerpc/include/uapi/asm/mman.h6
-rw-r--r--arch/powerpc/include/uapi/asm/unistd.h3
-rw-r--r--arch/powerpc/kernel/asm-offsets.c7
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S50
-rw-r--r--arch/powerpc/kernel/cputable.c15
-rw-r--r--arch/powerpc/kernel/crash.c16
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c30
-rw-r--r--arch/powerpc/kernel/eeh.c59
-rw-r--r--arch/powerpc/kernel/eeh_driver.c10
-rw-r--r--arch/powerpc/kernel/eeh_sysfs.c64
-rw-r--r--arch/powerpc/kernel/entry_32.S10
-rw-r--r--arch/powerpc/kernel/entry_64.S67
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S20
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S118
-rw-r--r--arch/powerpc/kernel/head_64.S11
-rw-r--r--arch/powerpc/kernel/head_8xx.S275
-rw-r--r--arch/powerpc/kernel/idle_book3e.S5
-rw-r--r--arch/powerpc/kernel/idle_power4.S5
-rw-r--r--arch/powerpc/kernel/irq.c29
-rw-r--r--arch/powerpc/kernel/mce.c142
-rw-r--r--arch/powerpc/kernel/mce_power.c115
-rw-r--r--arch/powerpc/kernel/module.lds8
-rw-r--r--arch/powerpc/kernel/module_64.c35
-rw-r--r--arch/powerpc/kernel/optprobes_head.S2
-rw-r--r--arch/powerpc/kernel/paca.c13
-rw-r--r--arch/powerpc/kernel/pci-common.c27
-rw-r--r--arch/powerpc/kernel/pci_dn.c6
-rw-r--r--arch/powerpc/kernel/pci_of_scan.c2
-rw-r--r--arch/powerpc/kernel/proc_powerpc.c2
-rw-r--r--arch/powerpc/kernel/process.c28
-rw-r--r--arch/powerpc/kernel/prom.c115
-rw-r--r--arch/powerpc/kernel/prom_init.c2
-rw-r--r--arch/powerpc/kernel/ptrace.c78
-rw-r--r--arch/powerpc/kernel/rtas-proc.c14
-rw-r--r--arch/powerpc/kernel/rtas_flash.c2
-rw-r--r--arch/powerpc/kernel/rtasd.c2
-rw-r--r--arch/powerpc/kernel/setup-common.c21
-rw-r--r--arch/powerpc/kernel/setup.h4
-rw-r--r--arch/powerpc/kernel/setup_64.c48
-rw-r--r--arch/powerpc/kernel/signal_32.c8
-rw-r--r--arch/powerpc/kernel/signal_64.c11
-rw-r--r--arch/powerpc/kernel/smp.c18
-rw-r--r--arch/powerpc/kernel/sysfs.c8
-rw-r--r--arch/powerpc/kernel/time.c6
-rw-r--r--arch/powerpc/kernel/traps.c51
-rw-r--r--arch/powerpc/kernel/vdso64/gettimeofday.S67
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S1
-rw-r--r--arch/powerpc/kernel/watchdog.c100
-rw-r--r--arch/powerpc/kvm/book3s_hv.c10
-rw-r--r--arch/powerpc/kvm/book3s_hv_ras.c14
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c9
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S11
-rw-r--r--arch/powerpc/kvm/book3s_xics.c2
-rw-r--r--arch/powerpc/lib/code-patching.c37
-rw-r--r--arch/powerpc/lib/feature-fixups.c8
-rw-r--r--arch/powerpc/mm/8xx_mmu.c4
-rw-r--r--arch/powerpc/mm/Makefile3
-rw-r--r--arch/powerpc/mm/drmem.c439
-rw-r--r--arch/powerpc/mm/dump_linuxpagetables.c32
-rw-r--r--arch/powerpc/mm/fault.c53
-rw-r--r--arch/powerpc/mm/hash64_4k.c14
-rw-r--r--arch/powerpc/mm/hash64_64k.c123
-rw-r--r--arch/powerpc/mm/hash_native_64.c97
-rw-r--r--arch/powerpc/mm/hash_utils_64.c97
-rw-r--r--arch/powerpc/mm/hugetlbpage-hash64.c16
-rw-r--r--arch/powerpc/mm/hugetlbpage.c8
-rw-r--r--arch/powerpc/mm/init_64.c5
-rw-r--r--arch/powerpc/mm/mem.c3
-rw-r--r--arch/powerpc/mm/mmu_context_book3s64.c2
-rw-r--r--arch/powerpc/mm/numa.c333
-rw-r--r--arch/powerpc/mm/pgtable-radix.c23
-rw-r--r--arch/powerpc/mm/pgtable.c3
-rw-r--r--arch/powerpc/mm/pgtable_32.c9
-rw-r--r--arch/powerpc/mm/pgtable_64.c14
-rw-r--r--arch/powerpc/mm/pkeys.c468
-rw-r--r--arch/powerpc/mm/subpage-prot.c3
-rw-r--r--arch/powerpc/mm/tlb-radix.c68
-rw-r--r--arch/powerpc/mm/tlb_nohash.c5
-rw-r--r--arch/powerpc/perf/8xx-pmu.c52
-rw-r--r--arch/powerpc/perf/Makefile2
-rw-r--r--arch/powerpc/perf/core-book3s.c2
-rw-r--r--arch/powerpc/perf/imc-pmu.c102
-rw-r--r--arch/powerpc/platforms/44x/fsp2.c259
-rw-r--r--arch/powerpc/platforms/44x/fsp2.h272
-rw-r--r--arch/powerpc/platforms/512x/mpc512x_shared.c4
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_gpt.c52
-rw-r--r--arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c2
-rw-r--r--arch/powerpc/platforms/83xx/mpc832x_mds.c2
-rw-r--r--arch/powerpc/platforms/83xx/mpc832x_rdb.c2
-rw-r--r--arch/powerpc/platforms/83xx/mpc836x_mds.c2
-rw-r--r--arch/powerpc/platforms/85xx/socrates_fpga_pic.c7
-rw-r--r--arch/powerpc/platforms/86xx/mpc86xx_hpcn.c2
-rw-r--r--arch/powerpc/platforms/8xx/Kconfig12
-rw-r--r--arch/powerpc/platforms/Kconfig11
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype8
-rw-r--r--arch/powerpc/platforms/cell/interrupt.c3
-rw-r--r--arch/powerpc/platforms/cell/setup.c3
-rw-r--r--arch/powerpc/platforms/cell/spider-pic.c3
-rw-r--r--arch/powerpc/platforms/cell/spu_manage.c4
-rw-r--r--arch/powerpc/platforms/cell/spufs/file.c6
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c4
-rw-r--r--arch/powerpc/platforms/cell/spufs/spufs.h2
-rw-r--r--arch/powerpc/platforms/pasemi/dma_lib.c4
-rw-r--r--arch/powerpc/platforms/powermac/backlight.c6
-rw-r--r--arch/powerpc/platforms/powermac/feature.c3
-rw-r--r--arch/powerpc/platforms/powermac/pic.c8
-rw-r--r--arch/powerpc/platforms/powermac/smp.c4
-rw-r--r--arch/powerpc/platforms/powernv/Makefile1
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c101
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c7
-rw-r--r--arch/powerpc/platforms/powernv/ocxl.c515
-rw-r--r--arch/powerpc/platforms/powernv/opal-dump.c4
-rw-r--r--arch/powerpc/platforms/powernv/opal-elog.c4
-rw-r--r--arch/powerpc/platforms/powernv/opal-imc.c77
-rw-r--r--arch/powerpc/platforms/powernv/opal-sysparam.c6
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S3
-rw-r--r--arch/powerpc/platforms/powernv/opal.c28
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c88
-rw-r--r--arch/powerpc/platforms/powernv/pci.c4
-rw-r--r--arch/powerpc/platforms/powernv/pci.h8
-rw-r--r--arch/powerpc/platforms/powernv/smp.c28
-rw-r--r--arch/powerpc/platforms/ps3/device-init.c12
-rw-r--r--arch/powerpc/platforms/ps3/mm.c4
-rw-r--r--arch/powerpc/platforms/ps3/os-area.c2
-rw-r--r--arch/powerpc/platforms/ps3/setup.c1
-rw-r--r--arch/powerpc/platforms/pseries/cmm.c16
-rw-r--r--arch/powerpc/platforms/pseries/eeh_pseries.c162
-rw-r--r--arch/powerpc/platforms/pseries/firmware.c2
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c3
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c522
-rw-r--r--arch/powerpc/platforms/pseries/hvCall_inst.c2
-rw-r--r--arch/powerpc/platforms/pseries/ibmebus.c4
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c8
-rw-r--r--arch/powerpc/platforms/pseries/lparcfg.c8
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c2
-rw-r--r--arch/powerpc/platforms/pseries/of_helpers.c60
-rw-r--r--arch/powerpc/platforms/pseries/pci.c177
-rw-r--r--arch/powerpc/platforms/pseries/pseries_energy.c126
-rw-r--r--arch/powerpc/platforms/pseries/reconfig.c2
-rw-r--r--arch/powerpc/platforms/pseries/scanlog.c2
-rw-r--r--arch/powerpc/platforms/pseries/setup.c176
-rw-r--r--arch/powerpc/platforms/pseries/suspend.c3
-rw-r--r--arch/powerpc/sysdev/Makefile4
-rw-r--r--arch/powerpc/sysdev/axonram.c383
-rw-r--r--arch/powerpc/sysdev/cpm1.c33
-rw-r--r--arch/powerpc/sysdev/cpm2.c11
-rw-r--r--arch/powerpc/sysdev/cpm_common.c5
-rw-r--r--arch/powerpc/sysdev/cpm_gpio.c80
-rw-r--r--arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c16
-rw-r--r--arch/powerpc/sysdev/fsl_pci.c14
-rw-r--r--arch/powerpc/sysdev/mpic.c10
-rw-r--r--arch/powerpc/sysdev/mpic_timer.c55
-rw-r--r--arch/powerpc/sysdev/mv64x60_pci.c2
-rw-r--r--arch/powerpc/sysdev/xics/icp-native.c10
-rw-r--r--arch/powerpc/sysdev/xics/ics-opal.c4
-rw-r--r--arch/powerpc/sysdev/xics/ics-rtas.c4
-rw-r--r--arch/powerpc/sysdev/xics/xics-common.c8
-rw-r--r--arch/powerpc/sysdev/xive/common.c8
-rw-r--r--arch/powerpc/xmon/ppc-dis.c4
-rw-r--r--arch/powerpc/xmon/xmon.c6
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/entry/common.c9
-rw-r--r--arch/x86/entry/entry_64.S127
-rw-r--r--arch/x86/entry/syscall_64.c7
-rw-r--r--arch/x86/include/asm/barrier.h28
-rw-r--r--arch/x86/include/asm/fixmap.h6
-rw-r--r--arch/x86/include/asm/msr.h3
-rw-r--r--arch/x86/include/asm/nospec-branch.h2
-rw-r--r--arch/x86/include/asm/pgtable_32_types.h5
-rw-r--r--arch/x86/include/asm/processor.h10
-rw-r--r--arch/x86/include/asm/syscall.h6
-rw-r--r--arch/x86/include/asm/thread_info.h3
-rw-r--r--arch/x86/include/asm/tlbflush.h2
-rw-r--r--arch/x86/include/asm/uaccess.h15
-rw-r--r--arch/x86/include/asm/uaccess_32.h6
-rw-r--r--arch/x86/include/asm/uaccess_64.h12
-rw-r--r--arch/x86/kernel/alternative.c14
-rw-r--r--arch/x86/kernel/cpu/bugs.c96
-rw-r--r--arch/x86/kernel/cpu/common.c25
-rw-r--r--arch/x86/kernel/cpu/intel.c27
-rw-r--r--arch/x86/kernel/dumpstack.c2
-rw-r--r--arch/x86/kernel/process_64.c4
-rw-r--r--arch/x86/kernel/ptrace.c2
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S8
-rw-r--r--arch/x86/kernel/signal.c2
-rw-r--r--arch/x86/kvm/cpuid.c22
-rw-r--r--arch/x86/kvm/cpuid.h1
-rw-r--r--arch/x86/kvm/svm.c116
-rw-r--r--arch/x86/kvm/vmx.c656
-rw-r--r--arch/x86/kvm/x86.c8
-rw-r--r--arch/x86/lib/getuser.S10
-rw-r--r--arch/x86/lib/usercopy_32.c8
-rw-r--r--arch/x86/mm/tlb.c33
-rw-r--r--arch/x86/power/hibernate_32.c2
-rw-r--r--arch/x86/power/hibernate_64.c2
350 files changed, 8211 insertions, 4077 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index d007b2a15b22..467dfa35bf96 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -245,6 +245,17 @@ config ARCH_TASK_STRUCT_ON_STACK
config ARCH_TASK_STRUCT_ALLOCATOR
bool
+config HAVE_ARCH_THREAD_STRUCT_WHITELIST
+ bool
+ depends on !ARCH_TASK_STRUCT_ALLOCATOR
+ help
+ An architecture should select this to provide hardened usercopy
+ knowledge about what region of the thread_struct should be
+ whitelisted for copying to userspace. Normally this is only the
+ FPU registers. Specifically, arch_thread_struct_whitelist()
+ should be implemented. Without this, the entire thread_struct
+ field in task_struct will be left whitelisted.
+
# Select if arch has its private alloc_thread_stack() function
config ARCH_THREAD_STACK_ALLOCATOR
bool
diff --git a/arch/alpha/include/asm/futex.h b/arch/alpha/include/asm/futex.h
index d2e4da93e68c..ca3322536f72 100644
--- a/arch/alpha/include/asm/futex.h
+++ b/arch/alpha/include/asm/futex.h
@@ -20,8 +20,8 @@
"3: .subsection 2\n" \
"4: br 1b\n" \
" .previous\n" \
- EXC(1b,3b,%1,$31) \
- EXC(2b,3b,%1,$31) \
+ EXC(1b,3b,$31,%1) \
+ EXC(2b,3b,$31,%1) \
: "=&r" (oldval), "=&r"(ret) \
: "r" (uaddr), "r"(oparg) \
: "memory")
@@ -82,8 +82,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
"3: .subsection 2\n"
"4: br 1b\n"
" .previous\n"
- EXC(1b,3b,%0,$31)
- EXC(2b,3b,%0,$31)
+ EXC(1b,3b,$31,%0)
+ EXC(2b,3b,$31,%0)
: "+r"(ret), "=&r"(prev), "=&r"(cmp)
: "r"(uaddr), "r"((long)(int)oldval), "r"(newval)
: "memory");
diff --git a/arch/alpha/include/asm/processor.h b/arch/alpha/include/asm/processor.h
index bfe784f2d4af..cb05d045efe3 100644
--- a/arch/alpha/include/asm/processor.h
+++ b/arch/alpha/include/asm/processor.h
@@ -40,15 +40,12 @@ typedef struct {
struct thread_struct { };
#define INIT_THREAD { }
-/* Return saved PC of a blocked thread. */
-struct task_struct;
-extern unsigned long thread_saved_pc(struct task_struct *);
-
/* Do necessary setup to start up a newly executed thread. */
struct pt_regs;
extern void start_thread(struct pt_regs *, unsigned long, unsigned long);
/* Free all resources held by a thread. */
+struct task_struct;
extern void release_thread(struct task_struct *);
unsigned long get_wchan(struct task_struct *p);
diff --git a/arch/alpha/include/uapi/asm/termbits.h b/arch/alpha/include/uapi/asm/termbits.h
index 05e0398a83a6..de6c8360fbe3 100644
--- a/arch/alpha/include/uapi/asm/termbits.h
+++ b/arch/alpha/include/uapi/asm/termbits.h
@@ -110,7 +110,11 @@ struct ktermios {
#define VTDLY 00200000
#define VT0 00000000
#define VT1 00200000
-#define XTABS 01000000 /* Hmm.. Linux/i386 considers this part of TABDLY.. */
+/*
+ * Should be equivalent to TAB3, see description of TAB3 in
+ * POSIX.1-2008, Ch. 11.2.3 "Output Modes"
+ */
+#define XTABS TAB3
/* c_cflag bit meaning */
#define CBAUD 0000037
diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index 87da00579946..2e86ebb680ae 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -425,7 +425,7 @@ sys_pciconfig_iobase(long which, unsigned long bus, unsigned long dfn)
if (bus == 0 && dfn == 0) {
hose = pci_isa_hose;
} else {
- dev = pci_get_bus_and_slot(bus, dfn);
+ dev = pci_get_domain_bus_and_slot(0, bus, dfn);
if (!dev)
return -ENODEV;
hose = dev->sysdata;
diff --git a/arch/alpha/kernel/pci_impl.h b/arch/alpha/kernel/pci_impl.h
index 2e4cb74fdc41..18043af45e2b 100644
--- a/arch/alpha/kernel/pci_impl.h
+++ b/arch/alpha/kernel/pci_impl.h
@@ -144,7 +144,8 @@ struct pci_iommu_arena
};
#if defined(CONFIG_ALPHA_SRM) && \
- (defined(CONFIG_ALPHA_CIA) || defined(CONFIG_ALPHA_LCA))
+ (defined(CONFIG_ALPHA_CIA) || defined(CONFIG_ALPHA_LCA) || \
+ defined(CONFIG_ALPHA_AVANTI))
# define NEED_SRM_SAVE_RESTORE
#else
# undef NEED_SRM_SAVE_RESTORE
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 74bfb1f2d68e..48b81d015d8a 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -269,12 +269,13 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
application calling fork. */
if (clone_flags & CLONE_SETTLS)
childti->pcb.unique = regs->r20;
+ else
+ regs->r20 = 0; /* OSF/1 has some strange fork() semantics. */
childti->pcb.usp = usp ?: rdusp();
*childregs = *regs;
childregs->r0 = 0;
childregs->r19 = 0;
childregs->r20 = 1; /* OSF/1 has some strange fork() semantics. */
- regs->r20 = 0;
stack = ((struct switch_stack *) regs) - 1;
*childstack = *stack;
childstack->r26 = (unsigned long) ret_from_fork;
@@ -361,7 +362,7 @@ EXPORT_SYMBOL(dump_elf_task_fp);
* all. -- r~
*/
-unsigned long
+static unsigned long
thread_saved_pc(struct task_struct *t)
{
unsigned long base = (unsigned long)task_stack_page(t);
diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c
index 239dc0e601d5..ff4f54b86c7f 100644
--- a/arch/alpha/kernel/sys_nautilus.c
+++ b/arch/alpha/kernel/sys_nautilus.c
@@ -237,7 +237,7 @@ nautilus_init_pci(void)
bus = hose->bus = bridge->bus;
pcibios_claim_one_bus(bus);
- irongate = pci_get_bus_and_slot(0, 0);
+ irongate = pci_get_domain_bus_and_slot(pci_domain_nr(bus), 0, 0);
bus->self = irongate;
bus->resource[0] = &irongate_io;
bus->resource[1] = &irongate_mem;
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index 4bd99a7b1c41..f43bd05dede2 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -160,11 +160,16 @@ void show_stack(struct task_struct *task, unsigned long *sp)
for(i=0; i < kstack_depth_to_print; i++) {
if (((long) stack & (THREAD_SIZE-1)) == 0)
break;
- if (i && ((i % 4) == 0))
- printk("\n ");
- printk("%016lx ", *stack++);
+ if ((i % 4) == 0) {
+ if (i)
+ pr_cont("\n");
+ printk(" ");
+ } else {
+ pr_cont(" ");
+ }
+ pr_cont("%016lx", *stack++);
}
- printk("\n");
+ pr_cont("\n");
dik_show_trace(sp);
}
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 11647091fa6d..7e3d53575486 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -3,8 +3,8 @@ config ARM
bool
default y
select ARCH_CLOCKSOURCE_DATA
- select ARCH_DISCARD_MEMBLOCK if !HAVE_ARCH_PFN_VALID
- select ARCH_HAS_DEBUG_VIRTUAL
+ select ARCH_DISCARD_MEMBLOCK if !HAVE_ARCH_PFN_VALID && !KEXEC
+ select ARCH_HAS_DEBUG_VIRTUAL if MMU
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_SET_MEMORY
@@ -51,6 +51,7 @@ config ARM
select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
select HAVE_ARCH_MMAP_RND_BITS if MMU
select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
+ select HAVE_ARCH_THREAD_STRUCT_WHITELIST
select HAVE_ARCH_TRACEHOOK
select HAVE_ARM_SMCCC if CPU_V7
select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32
@@ -100,6 +101,7 @@ config ARM
select OLD_SIGACTION
select OLD_SIGSUSPEND3
select PERF_USE_VMALLOC
+ select REFCOUNT_FULL
select RTC_LIB
select SYS_SUPPORTS_APM_EMULATION
# Above selects are sorted alphabetically; please add new ones
@@ -1526,12 +1528,10 @@ config THUMB2_KERNEL
bool "Compile the kernel in Thumb-2 mode" if !CPU_THUMBONLY
depends on (CPU_V7 || CPU_V7M) && !CPU_V6 && !CPU_V6K
default y if CPU_THUMBONLY
- select ARM_ASM_UNIFIED
select ARM_UNWIND
help
By enabling this option, the kernel will be compiled in
- Thumb-2 mode. A compiler/assembler that understand the unified
- ARM-Thumb syntax is needed.
+ Thumb-2 mode.
If unsure, say N.
@@ -1566,9 +1566,6 @@ config THUMB2_AVOID_R_ARM_THM_JUMP11
Unless you are sure your tools don't have this problem, say Y.
-config ARM_ASM_UNIFIED
- bool
-
config ARM_PATCH_IDIV
bool "Runtime patch udiv/sdiv instructions into __aeabi_{u}idiv()"
depends on CPU_32v7 && !XIP_KERNEL
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index 17685e19aed8..78a647080ebc 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -3,10 +3,14 @@ menu "Kernel hacking"
source "lib/Kconfig.debug"
-config ARM_PTDUMP
+config ARM_PTDUMP_CORE
+ def_bool n
+
+config ARM_PTDUMP_DEBUGFS
bool "Export kernel pagetable layout to userspace via debugfs"
depends on DEBUG_KERNEL
depends on MMU
+ select ARM_PTDUMP_CORE
select DEBUG_FS
---help---
Say Y here if you want to show the kernel pagetable layout in a
@@ -16,6 +20,33 @@ config ARM_PTDUMP
kernel.
If in doubt, say "N"
+config DEBUG_WX
+ bool "Warn on W+X mappings at boot"
+ select ARM_PTDUMP_CORE
+ ---help---
+ Generate a warning if any W+X mappings are found at boot.
+
+ This is useful for discovering cases where the kernel is leaving
+ W+X mappings after applying NX, as such mappings are a security risk.
+
+ Look for a message in dmesg output like this:
+
+ arm/mm: Checked W+X mappings: passed, no W+X pages found.
+
+ or like this, if the check failed:
+
+ arm/mm: Checked W+X mappings: FAILED, <N> W+X pages found.
+
+ Note that even if the check fails, your kernel is possibly
+ still fine, as W+X mappings are not a security hole in
+ themselves, what they do is that they make the exploitation
+ of other unfixed kernel bugs easier.
+
+ There is no runtime or memory usage effect of this option
+ once the kernel has booted up - it's a one time check.
+
+ If in doubt, say "Y".
+
# RMK wants arm kernels compiled with frame pointers or stack unwinding.
# If you know what you are doing and are willing to live without stack
# traces, you can get a slightly smaller kernel by setting this option to
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 80351e505fd5..e83f5161fdd8 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -115,9 +115,11 @@ ifeq ($(CONFIG_ARM_UNWIND),y)
CFLAGS_ABI +=-funwind-tables
endif
+# Accept old syntax despite ".syntax unified"
+AFLAGS_NOWARN :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W)
+
ifeq ($(CONFIG_THUMB2_KERNEL),y)
AFLAGS_AUTOIT :=$(call as-option,-Wa$(comma)-mimplicit-it=always,-Wa$(comma)-mauto-it)
-AFLAGS_NOWARN :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W)
CFLAGS_ISA :=-mthumb $(AFLAGS_AUTOIT) $(AFLAGS_NOWARN)
AFLAGS_ISA :=$(CFLAGS_ISA) -Wa$(comma)-mthumb
# Work around buggy relocation from gas if requested:
@@ -125,7 +127,7 @@ ifeq ($(CONFIG_THUMB2_AVOID_R_ARM_THM_JUMP11),y)
KBUILD_CFLAGS_MODULE +=-fno-optimize-sibling-calls
endif
else
-CFLAGS_ISA :=$(call cc-option,-marm,)
+CFLAGS_ISA :=$(call cc-option,-marm,) $(AFLAGS_NOWARN)
AFLAGS_ISA :=$(CFLAGS_ISA)
endif
diff --git a/arch/arm/boot/compressed/string.c b/arch/arm/boot/compressed/string.c
index 309e1bbad75d..13c90abc68d6 100644
--- a/arch/arm/boot/compressed/string.c
+++ b/arch/arm/boot/compressed/string.c
@@ -130,8 +130,3 @@ void *memset(void *s, int c, size_t count)
*xs++ = c;
return s;
}
-
-void __memzero(void *s, size_t count)
-{
- memset(s, 0, count);
-}
diff --git a/arch/arm/boot/compressed/vmlinux.lds.S b/arch/arm/boot/compressed/vmlinux.lds.S
index e6bf6774c4bb..2b963d8e76dd 100644
--- a/arch/arm/boot/compressed/vmlinux.lds.S
+++ b/arch/arm/boot/compressed/vmlinux.lds.S
@@ -56,6 +56,7 @@ SECTIONS
.rodata : {
*(.rodata)
*(.rodata.*)
+ *(.data.rel.ro)
}
.piggydata : {
*(.piggydata)
@@ -101,6 +102,12 @@ SECTIONS
* this symbol allows further debug in the near future.
*/
.image_end (NOLOAD) : {
+ /*
+ * EFI requires that the image is aligned to 512 bytes, and appended
+ * DTB requires that we know where the end of the image is. Ensure
+ * that both are satisfied by ensuring that there are no additional
+ * sections emitted into the decompressor image.
+ */
_edata_real = .;
}
@@ -128,3 +135,4 @@ SECTIONS
.stab.indexstr 0 : { *(.stab.indexstr) }
.comment 0 : { *(.comment) }
}
+ASSERT(_edata_real == _edata, "error: zImage file size is incorrect");
diff --git a/arch/arm/common/bL_switcher_dummy_if.c b/arch/arm/common/bL_switcher_dummy_if.c
index 4c10c6452678..f4dc1714a79e 100644
--- a/arch/arm/common/bL_switcher_dummy_if.c
+++ b/arch/arm/common/bL_switcher_dummy_if.c
@@ -57,3 +57,7 @@ static struct miscdevice bL_switcher_device = {
&bL_switcher_fops
};
module_misc_device(bL_switcher_device);
+
+MODULE_AUTHOR("Nicolas Pitre <nico@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("big.LITTLE switcher dummy user interface");
diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
index 4ecd5120fce7..a2c878769eaf 100644
--- a/arch/arm/common/sa1111.c
+++ b/arch/arm/common/sa1111.c
@@ -108,6 +108,7 @@ struct sa1111 {
spinlock_t lock;
void __iomem *base;
struct sa1111_platform_data *pdata;
+ struct irq_domain *irqdomain;
struct gpio_chip gc;
#ifdef CONFIG_PM
void *saved_state;
@@ -125,7 +126,7 @@ struct sa1111_dev_info {
unsigned long skpcr_mask;
bool dma;
unsigned int devid;
- unsigned int irq[6];
+ unsigned int hwirq[6];
};
static struct sa1111_dev_info sa1111_devices[] = {
@@ -134,7 +135,7 @@ static struct sa1111_dev_info sa1111_devices[] = {
.skpcr_mask = SKPCR_UCLKEN,
.dma = true,
.devid = SA1111_DEVID_USB,
- .irq = {
+ .hwirq = {
IRQ_USBPWR,
IRQ_HCIM,
IRQ_HCIBUFFACC,
@@ -148,7 +149,7 @@ static struct sa1111_dev_info sa1111_devices[] = {
.skpcr_mask = SKPCR_I2SCLKEN | SKPCR_L3CLKEN,
.dma = true,
.devid = SA1111_DEVID_SAC,
- .irq = {
+ .hwirq = {
AUDXMTDMADONEA,
AUDXMTDMADONEB,
AUDRCVDMADONEA,
@@ -164,7 +165,7 @@ static struct sa1111_dev_info sa1111_devices[] = {
.offset = SA1111_KBD,
.skpcr_mask = SKPCR_PTCLKEN,
.devid = SA1111_DEVID_PS2_KBD,
- .irq = {
+ .hwirq = {
IRQ_TPRXINT,
IRQ_TPTXINT
},
@@ -173,7 +174,7 @@ static struct sa1111_dev_info sa1111_devices[] = {
.offset = SA1111_MSE,
.skpcr_mask = SKPCR_PMCLKEN,
.devid = SA1111_DEVID_PS2_MSE,
- .irq = {
+ .hwirq = {
IRQ_MSRXINT,
IRQ_MSTXINT
},
@@ -182,7 +183,7 @@ static struct sa1111_dev_info sa1111_devices[] = {
.offset = 0x1800,
.skpcr_mask = 0,
.devid = SA1111_DEVID_PCMCIA,
- .irq = {
+ .hwirq = {
IRQ_S0_READY_NINT,
IRQ_S0_CD_VALID,
IRQ_S0_BVD1_STSCHG,
@@ -193,6 +194,19 @@ static struct sa1111_dev_info sa1111_devices[] = {
},
};
+static int sa1111_map_irq(struct sa1111 *sachip, irq_hw_number_t hwirq)
+{
+ return irq_create_mapping(sachip->irqdomain, hwirq);
+}
+
+static void sa1111_handle_irqdomain(struct irq_domain *irqdomain, int irq)
+{
+ struct irq_desc *d = irq_to_desc(irq_linear_revmap(irqdomain, irq));
+
+ if (d)
+ generic_handle_irq_desc(d);
+}
+
/*
* SA1111 interrupt support. Since clearing an IRQ while there are
* active IRQs causes the interrupt output to pulse, the upper levels
@@ -202,49 +216,45 @@ static void sa1111_irq_handler(struct irq_desc *desc)
{
unsigned int stat0, stat1, i;
struct sa1111 *sachip = irq_desc_get_handler_data(desc);
+ struct irq_domain *irqdomain;
void __iomem *mapbase = sachip->base + SA1111_INTC;
- stat0 = sa1111_readl(mapbase + SA1111_INTSTATCLR0);
- stat1 = sa1111_readl(mapbase + SA1111_INTSTATCLR1);
+ stat0 = readl_relaxed(mapbase + SA1111_INTSTATCLR0);
+ stat1 = readl_relaxed(mapbase + SA1111_INTSTATCLR1);
- sa1111_writel(stat0, mapbase + SA1111_INTSTATCLR0);
+ writel_relaxed(stat0, mapbase + SA1111_INTSTATCLR0);
desc->irq_data.chip->irq_ack(&desc->irq_data);
- sa1111_writel(stat1, mapbase + SA1111_INTSTATCLR1);
+ writel_relaxed(stat1, mapbase + SA1111_INTSTATCLR1);
if (stat0 == 0 && stat1 == 0) {
do_bad_IRQ(desc);
return;
}
+ irqdomain = sachip->irqdomain;
+
for (i = 0; stat0; i++, stat0 >>= 1)
if (stat0 & 1)
- generic_handle_irq(i + sachip->irq_base);
+ sa1111_handle_irqdomain(irqdomain, i);
for (i = 32; stat1; i++, stat1 >>= 1)
if (stat1 & 1)
- generic_handle_irq(i + sachip->irq_base);
+ sa1111_handle_irqdomain(irqdomain, i);
/* For level-based interrupts */
desc->irq_data.chip->irq_unmask(&desc->irq_data);
}
-#define SA1111_IRQMASK_LO(x) (1 << (x - sachip->irq_base))
-#define SA1111_IRQMASK_HI(x) (1 << (x - sachip->irq_base - 32))
-
static u32 sa1111_irqmask(struct irq_data *d)
{
- struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
-
- return BIT((d->irq - sachip->irq_base) & 31);
+ return BIT(irqd_to_hwirq(d) & 31);
}
static int sa1111_irqbank(struct irq_data *d)
{
- struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
-
- return ((d->irq - sachip->irq_base) / 32) * 4;
+ return (irqd_to_hwirq(d) / 32) * 4;
}
static void sa1111_ack_irq(struct irq_data *d)
@@ -257,9 +267,9 @@ static void sa1111_mask_irq(struct irq_data *d)
void __iomem *mapbase = sachip->base + SA1111_INTC + sa1111_irqbank(d);
u32 ie;
- ie = sa1111_readl(mapbase + SA1111_INTEN0);
+ ie = readl_relaxed(mapbase + SA1111_INTEN0);
ie &= ~sa1111_irqmask(d);
- sa1111_writel(ie, mapbase + SA1111_INTEN0);
+ writel(ie, mapbase + SA1111_INTEN0);
}
static void sa1111_unmask_irq(struct irq_data *d)
@@ -268,9 +278,9 @@ static void sa1111_unmask_irq(struct irq_data *d)
void __iomem *mapbase = sachip->base + SA1111_INTC + sa1111_irqbank(d);
u32 ie;
- ie = sa1111_readl(mapbase + SA1111_INTEN0);
+ ie = readl_relaxed(mapbase + SA1111_INTEN0);
ie |= sa1111_irqmask(d);
- sa1111_writel(ie, mapbase + SA1111_INTEN0);
+ writel_relaxed(ie, mapbase + SA1111_INTEN0);
}
/*
@@ -287,11 +297,11 @@ static int sa1111_retrigger_irq(struct irq_data *d)
u32 ip, mask = sa1111_irqmask(d);
int i;
- ip = sa1111_readl(mapbase + SA1111_INTPOL0);
+ ip = readl_relaxed(mapbase + SA1111_INTPOL0);
for (i = 0; i < 8; i++) {
- sa1111_writel(ip ^ mask, mapbase + SA1111_INTPOL0);
- sa1111_writel(ip, mapbase + SA1111_INTPOL0);
- if (sa1111_readl(mapbase + SA1111_INTSTATCLR0) & mask)
+ writel_relaxed(ip ^ mask, mapbase + SA1111_INTPOL0);
+ writel_relaxed(ip, mapbase + SA1111_INTPOL0);
+ if (readl_relaxed(mapbase + SA1111_INTSTATCLR0) & mask)
break;
}
@@ -313,13 +323,13 @@ static int sa1111_type_irq(struct irq_data *d, unsigned int flags)
if ((!(flags & IRQ_TYPE_EDGE_RISING) ^ !(flags & IRQ_TYPE_EDGE_FALLING)) == 0)
return -EINVAL;
- ip = sa1111_readl(mapbase + SA1111_INTPOL0);
+ ip = readl_relaxed(mapbase + SA1111_INTPOL0);
if (flags & IRQ_TYPE_EDGE_RISING)
ip &= ~mask;
else
ip |= mask;
- sa1111_writel(ip, mapbase + SA1111_INTPOL0);
- sa1111_writel(ip, mapbase + SA1111_WAKEPOL0);
+ writel_relaxed(ip, mapbase + SA1111_INTPOL0);
+ writel_relaxed(ip, mapbase + SA1111_WAKEPOL0);
return 0;
}
@@ -330,12 +340,12 @@ static int sa1111_wake_irq(struct irq_data *d, unsigned int on)
void __iomem *mapbase = sachip->base + SA1111_INTC + sa1111_irqbank(d);
u32 we, mask = sa1111_irqmask(d);
- we = sa1111_readl(mapbase + SA1111_WAKEEN0);
+ we = readl_relaxed(mapbase + SA1111_WAKEEN0);
if (on)
we |= mask;
else
we &= ~mask;
- sa1111_writel(we, mapbase + SA1111_WAKEEN0);
+ writel_relaxed(we, mapbase + SA1111_WAKEEN0);
return 0;
}
@@ -350,10 +360,30 @@ static struct irq_chip sa1111_irq_chip = {
.irq_set_wake = sa1111_wake_irq,
};
+static int sa1111_irqdomain_map(struct irq_domain *d, unsigned int irq,
+ irq_hw_number_t hwirq)
+{
+ struct sa1111 *sachip = d->host_data;
+
+ /* Disallow unavailable interrupts */
+ if (hwirq > SSPROR && hwirq < AUDXMTDMADONEA)
+ return -EINVAL;
+
+ irq_set_chip_data(irq, sachip);
+ irq_set_chip_and_handler(irq, &sa1111_irq_chip, handle_edge_irq);
+ irq_clear_status_flags(irq, IRQ_NOREQUEST | IRQ_NOPROBE);
+
+ return 0;
+}
+
+static const struct irq_domain_ops sa1111_irqdomain_ops = {
+ .map = sa1111_irqdomain_map,
+ .xlate = irq_domain_xlate_twocell,
+};
+
static int sa1111_setup_irq(struct sa1111 *sachip, unsigned irq_base)
{
void __iomem *irqbase = sachip->base + SA1111_INTC;
- unsigned i, irq;
int ret;
/*
@@ -373,38 +403,40 @@ static int sa1111_setup_irq(struct sa1111 *sachip, unsigned irq_base)
sachip->irq_base = ret;
/* disable all IRQs */
- sa1111_writel(0, irqbase + SA1111_INTEN0);
- sa1111_writel(0, irqbase + SA1111_INTEN1);
- sa1111_writel(0, irqbase + SA1111_WAKEEN0);
- sa1111_writel(0, irqbase + SA1111_WAKEEN1);
+ writel_relaxed(0, irqbase + SA1111_INTEN0);
+ writel_relaxed(0, irqbase + SA1111_INTEN1);
+ writel_relaxed(0, irqbase + SA1111_WAKEEN0);
+ writel_relaxed(0, irqbase + SA1111_WAKEEN1);
/*
* detect on rising edge. Note: Feb 2001 Errata for SA1111
* specifies that S0ReadyInt and S1ReadyInt should be '1'.
*/
- sa1111_writel(0, irqbase + SA1111_INTPOL0);
- sa1111_writel(BIT(IRQ_S0_READY_NINT & 31) |
- BIT(IRQ_S1_READY_NINT & 31),
- irqbase + SA1111_INTPOL1);
+ writel_relaxed(0, irqbase + SA1111_INTPOL0);
+ writel_relaxed(BIT(IRQ_S0_READY_NINT & 31) |
+ BIT(IRQ_S1_READY_NINT & 31),
+ irqbase + SA1111_INTPOL1);
/* clear all IRQs */
- sa1111_writel(~0, irqbase + SA1111_INTSTATCLR0);
- sa1111_writel(~0, irqbase + SA1111_INTSTATCLR1);
-
- for (i = IRQ_GPAIN0; i <= SSPROR; i++) {
- irq = sachip->irq_base + i;
- irq_set_chip_and_handler(irq, &sa1111_irq_chip, handle_edge_irq);
- irq_set_chip_data(irq, sachip);
- irq_clear_status_flags(irq, IRQ_NOREQUEST | IRQ_NOPROBE);
- }
+ writel_relaxed(~0, irqbase + SA1111_INTSTATCLR0);
+ writel_relaxed(~0, irqbase + SA1111_INTSTATCLR1);
- for (i = AUDXMTDMADONEA; i <= IRQ_S1_BVD1_STSCHG; i++) {
- irq = sachip->irq_base + i;
- irq_set_chip_and_handler(irq, &sa1111_irq_chip, handle_edge_irq);
- irq_set_chip_data(irq, sachip);
- irq_clear_status_flags(irq, IRQ_NOREQUEST | IRQ_NOPROBE);
+ sachip->irqdomain = irq_domain_add_linear(NULL, SA1111_IRQ_NR,
+ &sa1111_irqdomain_ops,
+ sachip);
+ if (!sachip->irqdomain) {
+ irq_free_descs(sachip->irq_base, SA1111_IRQ_NR);
+ return -ENOMEM;
}
+ irq_domain_associate_many(sachip->irqdomain,
+ sachip->irq_base + IRQ_GPAIN0,
+ IRQ_GPAIN0, SSPROR + 1 - IRQ_GPAIN0);
+ irq_domain_associate_many(sachip->irqdomain,
+ sachip->irq_base + AUDXMTDMADONEA,
+ AUDXMTDMADONEA,
+ IRQ_S1_BVD1_STSCHG + 1 - AUDXMTDMADONEA);
+
/*
* Register SA1111 interrupt
*/
@@ -420,20 +452,22 @@ static int sa1111_setup_irq(struct sa1111 *sachip, unsigned irq_base)
static void sa1111_remove_irq(struct sa1111 *sachip)
{
+ struct irq_domain *domain = sachip->irqdomain;
void __iomem *irqbase = sachip->base + SA1111_INTC;
+ int i;
/* disable all IRQs */
- sa1111_writel(0, irqbase + SA1111_INTEN0);
- sa1111_writel(0, irqbase + SA1111_INTEN1);
- sa1111_writel(0, irqbase + SA1111_WAKEEN0);
- sa1111_writel(0, irqbase + SA1111_WAKEEN1);
+ writel_relaxed(0, irqbase + SA1111_INTEN0);
+ writel_relaxed(0, irqbase + SA1111_INTEN1);
+ writel_relaxed(0, irqbase + SA1111_WAKEEN0);
+ writel_relaxed(0, irqbase + SA1111_WAKEEN1);
- if (sachip->irq != NO_IRQ) {
- irq_set_chained_handler_and_data(sachip->irq, NULL, NULL);
- irq_free_descs(sachip->irq_base, SA1111_IRQ_NR);
+ irq_set_chained_handler_and_data(sachip->irq, NULL, NULL);
+ for (i = 0; i < SA1111_IRQ_NR; i++)
+ irq_dispose_mapping(irq_find_mapping(domain, i));
+ irq_domain_remove(domain);
- release_mem_region(sachip->phys + SA1111_INTC, 512);
- }
+ release_mem_region(sachip->phys + SA1111_INTC, 512);
}
enum {
@@ -572,7 +606,7 @@ static int sa1111_gpio_to_irq(struct gpio_chip *gc, unsigned offset)
{
struct sa1111 *sachip = gc_to_sa1111(gc);
- return sachip->irq_base + offset;
+ return sa1111_map_irq(sachip, offset);
}
static int sa1111_setup_gpios(struct sa1111 *sachip)
@@ -618,11 +652,11 @@ static void sa1111_wake(struct sa1111 *sachip)
/*
* Turn VCO on, and disable PLL Bypass.
*/
- r = sa1111_readl(sachip->base + SA1111_SKCR);
+ r = readl_relaxed(sachip->base + SA1111_SKCR);
r &= ~SKCR_VCO_OFF;
- sa1111_writel(r, sachip->base + SA1111_SKCR);
+ writel_relaxed(r, sachip->base + SA1111_SKCR);
r |= SKCR_PLL_BYPASS | SKCR_OE_EN;
- sa1111_writel(r, sachip->base + SA1111_SKCR);
+ writel_relaxed(r, sachip->base + SA1111_SKCR);
/*
* Wait lock time. SA1111 manual _doesn't_
@@ -634,7 +668,7 @@ static void sa1111_wake(struct sa1111 *sachip)
* Enable RCLK. We also ensure that RDYEN is set.
*/
r |= SKCR_RCLKEN | SKCR_RDYEN;
- sa1111_writel(r, sachip->base + SA1111_SKCR);
+ writel_relaxed(r, sachip->base + SA1111_SKCR);
/*
* Wait 14 RCLK cycles for the chip to finish coming out
@@ -645,7 +679,7 @@ static void sa1111_wake(struct sa1111 *sachip)
/*
* Ensure all clocks are initially off.
*/
- sa1111_writel(0, sachip->base + SA1111_SKPCR);
+ writel_relaxed(0, sachip->base + SA1111_SKPCR);
spin_unlock_irqrestore(&sachip->lock, flags);
}
@@ -675,7 +709,7 @@ sa1111_configure_smc(struct sa1111 *sachip, int sdram, unsigned int drac,
if (cas_latency == 3)
smcr |= SMCR_CLAT;
- sa1111_writel(smcr, sachip->base + SA1111_SMCR);
+ writel_relaxed(smcr, sachip->base + SA1111_SMCR);
/*
* Now clear the bits in the DMA mask to work around the SA1111
@@ -723,8 +757,8 @@ sa1111_init_one_child(struct sa1111 *sachip, struct resource *parent,
dev->mapbase = sachip->base + info->offset;
dev->skpcr_mask = info->skpcr_mask;
- for (i = 0; i < ARRAY_SIZE(info->irq); i++)
- dev->irq[i] = sachip->irq_base + info->irq[i];
+ for (i = 0; i < ARRAY_SIZE(info->hwirq); i++)
+ dev->hwirq[i] = info->hwirq[i];
/*
* If the parent device has a DMA mask associated with it, and
@@ -814,7 +848,7 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq)
/*
* Probe for the chip. Only touch the SBI registers.
*/
- id = sa1111_readl(sachip->base + SA1111_SKID);
+ id = readl_relaxed(sachip->base + SA1111_SKID);
if ((id & SKID_ID_MASK) != SKID_SA1111_ID) {
printk(KERN_DEBUG "SA1111 not detected: ID = %08lx\n", id);
ret = -ENODEV;
@@ -833,11 +867,9 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq)
* The interrupt controller must be initialised before any
* other device to ensure that the interrupts are available.
*/
- if (sachip->irq != NO_IRQ) {
- ret = sa1111_setup_irq(sachip, pd->irq_base);
- if (ret)
- goto err_clk;
- }
+ ret = sa1111_setup_irq(sachip, pd->irq_base);
+ if (ret)
+ goto err_clk;
/* Setup the GPIOs - should really be done after the IRQ setup */
ret = sa1111_setup_gpios(sachip);
@@ -864,8 +896,8 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq)
* DMA. It can otherwise be held firmly in the off position.
* (currently, we always enable it.)
*/
- val = sa1111_readl(sachip->base + SA1111_SKPCR);
- sa1111_writel(val | SKPCR_DCLKEN, sachip->base + SA1111_SKPCR);
+ val = readl_relaxed(sachip->base + SA1111_SKPCR);
+ writel_relaxed(val | SKPCR_DCLKEN, sachip->base + SA1111_SKPCR);
/*
* Enable the SA1110 memory bus request and grant signals.
@@ -962,31 +994,31 @@ static int sa1111_suspend_noirq(struct device *dev)
* Save state.
*/
base = sachip->base;
- save->skcr = sa1111_readl(base + SA1111_SKCR);
- save->skpcr = sa1111_readl(base + SA1111_SKPCR);
- save->skcdr = sa1111_readl(base + SA1111_SKCDR);
- save->skaud = sa1111_readl(base + SA1111_SKAUD);
- save->skpwm0 = sa1111_readl(base + SA1111_SKPWM0);
- save->skpwm1 = sa1111_readl(base + SA1111_SKPWM1);
+ save->skcr = readl_relaxed(base + SA1111_SKCR);
+ save->skpcr = readl_relaxed(base + SA1111_SKPCR);
+ save->skcdr = readl_relaxed(base + SA1111_SKCDR);
+ save->skaud = readl_relaxed(base + SA1111_SKAUD);
+ save->skpwm0 = readl_relaxed(base + SA1111_SKPWM0);
+ save->skpwm1 = readl_relaxed(base + SA1111_SKPWM1);
- sa1111_writel(0, sachip->base + SA1111_SKPWM0);
- sa1111_writel(0, sachip->base + SA1111_SKPWM1);
+ writel_relaxed(0, sachip->base + SA1111_SKPWM0);
+ writel_relaxed(0, sachip->base + SA1111_SKPWM1);
base = sachip->base + SA1111_INTC;
- save->intpol0 = sa1111_readl(base + SA1111_INTPOL0);
- save->intpol1 = sa1111_readl(base + SA1111_INTPOL1);
- save->inten0 = sa1111_readl(base + SA1111_INTEN0);
- save->inten1 = sa1111_readl(base + SA1111_INTEN1);
- save->wakepol0 = sa1111_readl(base + SA1111_WAKEPOL0);
- save->wakepol1 = sa1111_readl(base + SA1111_WAKEPOL1);
- save->wakeen0 = sa1111_readl(base + SA1111_WAKEEN0);
- save->wakeen1 = sa1111_readl(base + SA1111_WAKEEN1);
+ save->intpol0 = readl_relaxed(base + SA1111_INTPOL0);
+ save->intpol1 = readl_relaxed(base + SA1111_INTPOL1);
+ save->inten0 = readl_relaxed(base + SA1111_INTEN0);
+ save->inten1 = readl_relaxed(base + SA1111_INTEN1);
+ save->wakepol0 = readl_relaxed(base + SA1111_WAKEPOL0);
+ save->wakepol1 = readl_relaxed(base + SA1111_WAKEPOL1);
+ save->wakeen0 = readl_relaxed(base + SA1111_WAKEEN0);
+ save->wakeen1 = readl_relaxed(base + SA1111_WAKEEN1);
/*
* Disable.
*/
- val = sa1111_readl(sachip->base + SA1111_SKCR);
- sa1111_writel(val | SKCR_SLEEP, sachip->base + SA1111_SKCR);
+ val = readl_relaxed(sachip->base + SA1111_SKCR);
+ writel_relaxed(val | SKCR_SLEEP, sachip->base + SA1111_SKCR);
clk_disable(sachip->clk);
@@ -1023,7 +1055,7 @@ static int sa1111_resume_noirq(struct device *dev)
* Ensure that the SA1111 is still here.
* FIXME: shouldn't do this here.
*/
- id = sa1111_readl(sachip->base + SA1111_SKID);
+ id = readl_relaxed(sachip->base + SA1111_SKID);
if ((id & SKID_ID_MASK) != SKID_SA1111_ID) {
__sa1111_remove(sachip);
dev_set_drvdata(dev, NULL);
@@ -1047,26 +1079,26 @@ static int sa1111_resume_noirq(struct device *dev)
*/
spin_lock_irqsave(&sachip->lock, flags);
- sa1111_writel(0, sachip->base + SA1111_INTC + SA1111_INTEN0);
- sa1111_writel(0, sachip->base + SA1111_INTC + SA1111_INTEN1);
+ writel_relaxed(0, sachip->base + SA1111_INTC + SA1111_INTEN0);
+ writel_relaxed(0, sachip->base + SA1111_INTC + SA1111_INTEN1);
base = sachip->base;
- sa1111_writel(save->skcr, base + SA1111_SKCR);
- sa1111_writel(save->skpcr, base + SA1111_SKPCR);
- sa1111_writel(save->skcdr, base + SA1111_SKCDR);
- sa1111_writel(save->skaud, base + SA1111_SKAUD);
- sa1111_writel(save->skpwm0, base + SA1111_SKPWM0);
- sa1111_writel(save->skpwm1, base + SA1111_SKPWM1);
+ writel_relaxed(save->skcr, base + SA1111_SKCR);
+ writel_relaxed(save->skpcr, base + SA1111_SKPCR);
+ writel_relaxed(save->skcdr, base + SA1111_SKCDR);
+ writel_relaxed(save->skaud, base + SA1111_SKAUD);
+ writel_relaxed(save->skpwm0, base + SA1111_SKPWM0);
+ writel_relaxed(save->skpwm1, base + SA1111_SKPWM1);
base = sachip->base + SA1111_INTC;
- sa1111_writel(save->intpol0, base + SA1111_INTPOL0);
- sa1111_writel(save->intpol1, base + SA1111_INTPOL1);
- sa1111_writel(save->inten0, base + SA1111_INTEN0);
- sa1111_writel(save->inten1, base + SA1111_INTEN1);
- sa1111_writel(save->wakepol0, base + SA1111_WAKEPOL0);
- sa1111_writel(save->wakepol1, base + SA1111_WAKEPOL1);
- sa1111_writel(save->wakeen0, base + SA1111_WAKEEN0);
- sa1111_writel(save->wakeen1, base + SA1111_WAKEEN1);
+ writel_relaxed(save->intpol0, base + SA1111_INTPOL0);
+ writel_relaxed(save->intpol1, base + SA1111_INTPOL1);
+ writel_relaxed(save->inten0, base + SA1111_INTEN0);
+ writel_relaxed(save->inten1, base + SA1111_INTEN1);
+ writel_relaxed(save->wakepol0, base + SA1111_WAKEPOL0);
+ writel_relaxed(save->wakepol1, base + SA1111_WAKEPOL1);
+ writel_relaxed(save->wakeen0, base + SA1111_WAKEEN0);
+ writel_relaxed(save->wakeen1, base + SA1111_WAKEEN1);
spin_unlock_irqrestore(&sachip->lock, flags);
@@ -1153,7 +1185,7 @@ static unsigned int __sa1111_pll_clock(struct sa1111 *sachip)
{
unsigned int skcdr, fbdiv, ipdiv, opdiv;
- skcdr = sa1111_readl(sachip->base + SA1111_SKCDR);
+ skcdr = readl_relaxed(sachip->base + SA1111_SKCDR);
fbdiv = (skcdr & 0x007f) + 2;
ipdiv = ((skcdr & 0x0f80) >> 7) + 2;
@@ -1195,13 +1227,13 @@ void sa1111_select_audio_mode(struct sa1111_dev *sadev, int mode)
spin_lock_irqsave(&sachip->lock, flags);
- val = sa1111_readl(sachip->base + SA1111_SKCR);
+ val = readl_relaxed(sachip->base + SA1111_SKCR);
if (mode == SA1111_AUDIO_I2S) {
val &= ~SKCR_SELAC;
} else {
val |= SKCR_SELAC;
}
- sa1111_writel(val, sachip->base + SA1111_SKCR);
+ writel_relaxed(val, sachip->base + SA1111_SKCR);
spin_unlock_irqrestore(&sachip->lock, flags);
}
@@ -1226,7 +1258,7 @@ int sa1111_set_audio_rate(struct sa1111_dev *sadev, int rate)
if (div > 128)
div = 128;
- sa1111_writel(div - 1, sachip->base + SA1111_SKAUD);
+ writel_relaxed(div - 1, sachip->base + SA1111_SKAUD);
return 0;
}
@@ -1244,7 +1276,7 @@ int sa1111_get_audio_rate(struct sa1111_dev *sadev)
if (sadev->devid != SA1111_DEVID_SAC)
return -EINVAL;
- div = sa1111_readl(sachip->base + SA1111_SKAUD) + 1;
+ div = readl_relaxed(sachip->base + SA1111_SKAUD) + 1;
return __sa1111_pll_clock(sachip) / (256 * div);
}
@@ -1261,10 +1293,10 @@ void sa1111_set_io_dir(struct sa1111_dev *sadev,
#define MODIFY_BITS(port, mask, dir) \
if (mask) { \
- val = sa1111_readl(port); \
+ val = readl_relaxed(port); \
val &= ~(mask); \
val |= (dir) & (mask); \
- sa1111_writel(val, port); \
+ writel_relaxed(val, port); \
}
spin_lock_irqsave(&sachip->lock, flags);
@@ -1329,8 +1361,8 @@ int sa1111_enable_device(struct sa1111_dev *sadev)
if (ret == 0) {
spin_lock_irqsave(&sachip->lock, flags);
- val = sa1111_readl(sachip->base + SA1111_SKPCR);
- sa1111_writel(val | sadev->skpcr_mask, sachip->base + SA1111_SKPCR);
+ val = readl_relaxed(sachip->base + SA1111_SKPCR);
+ writel_relaxed(val | sadev->skpcr_mask, sachip->base + SA1111_SKPCR);
spin_unlock_irqrestore(&sachip->lock, flags);
}
return ret;
@@ -1348,8 +1380,8 @@ void sa1111_disable_device(struct sa1111_dev *sadev)
unsigned int val;
spin_lock_irqsave(&sachip->lock, flags);
- val = sa1111_readl(sachip->base + SA1111_SKPCR);
- sa1111_writel(val & ~sadev->skpcr_mask, sachip->base + SA1111_SKPCR);
+ val = readl_relaxed(sachip->base + SA1111_SKPCR);
+ writel_relaxed(val & ~sadev->skpcr_mask, sachip->base + SA1111_SKPCR);
spin_unlock_irqrestore(&sachip->lock, flags);
if (sachip->pdata && sachip->pdata->disable)
@@ -1359,9 +1391,10 @@ EXPORT_SYMBOL(sa1111_disable_device);
int sa1111_get_irq(struct sa1111_dev *sadev, unsigned num)
{
- if (num >= ARRAY_SIZE(sadev->irq))
+ struct sa1111 *sachip = sa1111_chip_driver(sadev);
+ if (num >= ARRAY_SIZE(sadev->hwirq))
return -EINVAL;
- return sadev->irq[num];
+ return sa1111_map_irq(sachip, sadev->hwirq[num]);
}
EXPORT_SYMBOL_GPL(sa1111_get_irq);
@@ -1379,36 +1412,6 @@ static int sa1111_match(struct device *_dev, struct device_driver *_drv)
return !!(dev->devid & drv->devid);
}
-static int sa1111_bus_suspend(struct device *dev, pm_message_t state)
-{
- struct sa1111_dev *sadev = to_sa1111_device(dev);
- struct sa1111_driver *drv = SA1111_DRV(dev->driver);
- int ret = 0;
-
- if (drv && drv->suspend)
- ret = drv->suspend(sadev, state);
- return ret;
-}
-
-static int sa1111_bus_resume(struct device *dev)
-{
- struct sa1111_dev *sadev = to_sa1111_device(dev);
- struct sa1111_driver *drv = SA1111_DRV(dev->driver);
- int ret = 0;
-
- if (drv && drv->resume)
- ret = drv->resume(sadev);
- return ret;
-}
-
-static void sa1111_bus_shutdown(struct device *dev)
-{
- struct sa1111_driver *drv = SA1111_DRV(dev->driver);
-
- if (drv && drv->shutdown)
- drv->shutdown(to_sa1111_device(dev));
-}
-
static int sa1111_bus_probe(struct device *dev)
{
struct sa1111_dev *sadev = to_sa1111_device(dev);
@@ -1436,9 +1439,6 @@ struct bus_type sa1111_bus_type = {
.match = sa1111_match,
.probe = sa1111_bus_probe,
.remove = sa1111_bus_remove,
- .suspend = sa1111_bus_suspend,
- .resume = sa1111_bus_resume,
- .shutdown = sa1111_bus_shutdown,
};
EXPORT_SYMBOL(sa1111_bus_type);
diff --git a/arch/arm/include/asm/exception.h b/arch/arm/include/asm/exception.h
index a7273ad9587a..58e039a851af 100644
--- a/arch/arm/include/asm/exception.h
+++ b/arch/arm/include/asm/exception.h
@@ -10,11 +10,10 @@
#include <linux/interrupt.h>
-#define __exception __attribute__((section(".exception.text")))
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
#define __exception_irq_entry __irq_entry
#else
-#define __exception_irq_entry __exception
+#define __exception_irq_entry
#endif
#endif /* __ASM_ARM_EXCEPTION_H */
diff --git a/arch/arm/include/asm/glue-cache.h b/arch/arm/include/asm/glue-cache.h
index 01c3d92624e5..8d1f498e5dd8 100644
--- a/arch/arm/include/asm/glue-cache.h
+++ b/arch/arm/include/asm/glue-cache.h
@@ -117,6 +117,10 @@
# endif
#endif
+#if defined(CONFIG_CACHE_B15_RAC)
+# define MULTI_CACHE 1
+#endif
+
#if defined(CONFIG_CPU_V7M)
# define MULTI_CACHE 1
#endif
diff --git a/arch/arm/include/asm/hardware/cache-b15-rac.h b/arch/arm/include/asm/hardware/cache-b15-rac.h
new file mode 100644
index 000000000000..3d43ec06fd35
--- /dev/null
+++ b/arch/arm/include/asm/hardware/cache-b15-rac.h
@@ -0,0 +1,10 @@
+#ifndef __ASM_ARM_HARDWARE_CACHE_B15_RAC_H
+#define __ASM_ARM_HARDWARE_CACHE_B15_RAC_H
+
+#ifndef __ASSEMBLY__
+
+void b15_flush_kern_cache_all(void);
+
+#endif
+
+#endif
diff --git a/arch/arm/include/asm/hardware/sa1111.h b/arch/arm/include/asm/hardware/sa1111.h
index 0bbf163d1ed3..798e520e8a49 100644
--- a/arch/arm/include/asm/hardware/sa1111.h
+++ b/arch/arm/include/asm/hardware/sa1111.h
@@ -16,33 +16,6 @@
#include <mach/bitfield.h>
/*
- * The SA1111 is always located at virtual 0xf4000000, and is always
- * "native" endian.
- */
-
-#define SA1111_VBASE 0xf4000000
-
-/* Don't use these! */
-#define SA1111_p2v( x ) ((x) - SA1111_BASE + SA1111_VBASE)
-#define SA1111_v2p( x ) ((x) - SA1111_VBASE + SA1111_BASE)
-
-#ifndef __ASSEMBLY__
-#define _SA1111(x) ((x) + sa1111->resource.start)
-#endif
-
-#define sa1111_writel(val,addr) __raw_writel(val, addr)
-#define sa1111_readl(addr) __raw_readl(addr)
-
-/*
- * 26 bits of the SA-1110 address bus are available to the SA-1111.
- * Use these when feeding target addresses to the DMA engines.
- */
-
-#define SA1111_ADDR_WIDTH (26)
-#define SA1111_ADDR_MASK ((1<<SA1111_ADDR_WIDTH)-1)
-#define SA1111_DMA_ADDR(x) ((x)&SA1111_ADDR_MASK)
-
-/*
* Don't ask the (SAC) DMA engines to move less than this amount.
*/
@@ -417,7 +390,7 @@ struct sa1111_dev {
struct resource res;
void __iomem *mapbase;
unsigned int skpcr_mask;
- unsigned int irq[6];
+ unsigned int hwirq[6];
u64 dma_mask;
};
@@ -431,9 +404,6 @@ struct sa1111_driver {
unsigned int devid;
int (*probe)(struct sa1111_dev *);
int (*remove)(struct sa1111_dev *);
- int (*suspend)(struct sa1111_dev *, pm_message_t);
- int (*resume)(struct sa1111_dev *);
- void (*shutdown)(struct sa1111_dev *);
};
#define SA1111_DRV(_d) container_of((_d), struct sa1111_driver, drv)
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 1f54e4e98c1e..496667703693 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -88,6 +88,7 @@
#else /* CONFIG_MMU */
#ifndef __ASSEMBLY__
+extern unsigned long setup_vectors_base(void);
extern unsigned long vectors_base;
#define VECTORS_BASE vectors_base
#endif
diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h
index 338cbe0a18ef..1bf65b47808a 100644
--- a/arch/arm/include/asm/processor.h
+++ b/arch/arm/include/asm/processor.h
@@ -45,6 +45,16 @@ struct thread_struct {
struct debug_info debug;
};
+/*
+ * Everything usercopied to/from thread_struct is statically-sized, so
+ * no hardened usercopy whitelist is needed.
+ */
+static inline void arch_thread_struct_whitelist(unsigned long *offset,
+ unsigned long *size)
+{
+ *offset = *size = 0;
+}
+
#define INIT_THREAD { }
#define start_thread(regs,pc,sp) \
diff --git a/arch/arm/include/asm/ptdump.h b/arch/arm/include/asm/ptdump.h
new file mode 100644
index 000000000000..3ebf9718288d
--- /dev/null
+++ b/arch/arm/include/asm/ptdump.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2014 ARM Ltd. */
+#ifndef __ASM_PTDUMP_H
+#define __ASM_PTDUMP_H
+
+#ifdef CONFIG_ARM_PTDUMP_CORE
+
+#include <linux/mm_types.h>
+#include <linux/seq_file.h>
+
+struct addr_marker {
+ unsigned long start_address;
+ char *name;
+};
+
+struct ptdump_info {
+ struct mm_struct *mm;
+ const struct addr_marker *markers;
+ unsigned long base_addr;
+};
+
+void ptdump_walk_pgd(struct seq_file *s, struct ptdump_info *info);
+#ifdef CONFIG_ARM_PTDUMP_DEBUGFS
+int ptdump_debugfs_register(struct ptdump_info *info, const char *name);
+#else
+static inline int ptdump_debugfs_register(struct ptdump_info *info,
+ const char *name)
+{
+ return 0;
+}
+#endif /* CONFIG_ARM_PTDUMP_DEBUGFS */
+
+void ptdump_check_wx(void);
+
+#endif /* CONFIG_ARM_PTDUMP_CORE */
+
+#ifdef CONFIG_DEBUG_WX
+#define debug_checkwx() ptdump_check_wx()
+#else
+#define debug_checkwx() do { } while (0)
+#endif
+
+#endif /* __ASM_PTDUMP_H */
diff --git a/arch/arm/include/asm/sections.h b/arch/arm/include/asm/sections.h
index 63dfe1f10335..4ceb4f757d4d 100644
--- a/arch/arm/include/asm/sections.h
+++ b/arch/arm/include/asm/sections.h
@@ -6,4 +6,25 @@
extern char _exiprom[];
+extern char __idmap_text_start[];
+extern char __idmap_text_end[];
+extern char __entry_text_start[];
+extern char __entry_text_end[];
+extern char __hyp_idmap_text_start[];
+extern char __hyp_idmap_text_end[];
+
+static inline bool in_entry_text(unsigned long addr)
+{
+ return memory_contains(__entry_text_start, __entry_text_end,
+ (void *)addr, 1);
+}
+
+static inline bool in_idmap_text(unsigned long addr)
+{
+ void *a = (void *)addr;
+ return memory_contains(__idmap_text_start, __idmap_text_end, a, 1) ||
+ memory_contains(__hyp_idmap_text_start, __hyp_idmap_text_end,
+ a, 1);
+}
+
#endif /* _ASM_ARM_SECTIONS_H */
diff --git a/arch/arm/include/asm/string.h b/arch/arm/include/asm/string.h
index f54a3136aac6..111a1d8a41dd 100644
--- a/arch/arm/include/asm/string.h
+++ b/arch/arm/include/asm/string.h
@@ -39,18 +39,4 @@ static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n)
return __memset64(p, v, n * 8, v >> 32);
}
-extern void __memzero(void *ptr, __kernel_size_t n);
-
-#define memset(p,v,n) \
- ({ \
- void *__p = (p); size_t __n = n; \
- if ((__n) != 0) { \
- if (__builtin_constant_p((v)) && (v) == 0) \
- __memzero((__p),(__n)); \
- else \
- memset((__p),(v),(__n)); \
- } \
- (__p); \
- })
-
#endif
diff --git a/arch/arm/include/asm/traps.h b/arch/arm/include/asm/traps.h
index f9a6c5fc3fd1..a00288d75ee6 100644
--- a/arch/arm/include/asm/traps.h
+++ b/arch/arm/include/asm/traps.h
@@ -28,18 +28,6 @@ static inline int __in_irqentry_text(unsigned long ptr)
ptr < (unsigned long)&__irqentry_text_end;
}
-static inline int in_exception_text(unsigned long ptr)
-{
- extern char __exception_text_start[];
- extern char __exception_text_end[];
- int in;
-
- in = ptr >= (unsigned long)&__exception_text_start &&
- ptr < (unsigned long)&__exception_text_end;
-
- return in ? : __in_irqentry_text(ptr);
-}
-
extern void __init early_trap_init(void *);
extern void dump_backtrace_entry(unsigned long where, unsigned long from, unsigned long frame);
extern void ptrace_break(struct task_struct *tsk, struct pt_regs *regs);
diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h
index a91ae499614c..2c3b952be63e 100644
--- a/arch/arm/include/asm/unified.h
+++ b/arch/arm/include/asm/unified.h
@@ -20,8 +20,10 @@
#ifndef __ASM_UNIFIED_H
#define __ASM_UNIFIED_H
-#if defined(__ASSEMBLY__) && defined(CONFIG_ARM_ASM_UNIFIED)
+#if defined(__ASSEMBLY__)
.syntax unified
+#else
+__asm__(".syntax unified");
#endif
#ifdef CONFIG_CPU_V7M
@@ -64,77 +66,4 @@
#endif /* CONFIG_THUMB2_KERNEL */
-#ifndef CONFIG_ARM_ASM_UNIFIED
-
-/*
- * If the unified assembly syntax isn't used (in ARM mode), these
- * macros expand to an empty string
- */
-#ifdef __ASSEMBLY__
- .macro it, cond
- .endm
- .macro itt, cond
- .endm
- .macro ite, cond
- .endm
- .macro ittt, cond
- .endm
- .macro itte, cond
- .endm
- .macro itet, cond
- .endm
- .macro itee, cond
- .endm
- .macro itttt, cond
- .endm
- .macro ittte, cond
- .endm
- .macro ittet, cond
- .endm
- .macro ittee, cond
- .endm
- .macro itett, cond
- .endm
- .macro itete, cond
- .endm
- .macro iteet, cond
- .endm
- .macro iteee, cond
- .endm
-#else /* !__ASSEMBLY__ */
-__asm__(
-" .macro it, cond\n"
-" .endm\n"
-" .macro itt, cond\n"
-" .endm\n"
-" .macro ite, cond\n"
-" .endm\n"
-" .macro ittt, cond\n"
-" .endm\n"
-" .macro itte, cond\n"
-" .endm\n"
-" .macro itet, cond\n"
-" .endm\n"
-" .macro itee, cond\n"
-" .endm\n"
-" .macro itttt, cond\n"
-" .endm\n"
-" .macro ittte, cond\n"
-" .endm\n"
-" .macro ittet, cond\n"
-" .endm\n"
-" .macro ittee, cond\n"
-" .endm\n"
-" .macro itett, cond\n"
-" .endm\n"
-" .macro itete, cond\n"
-" .endm\n"
-" .macro iteet, cond\n"
-" .endm\n"
-" .macro iteee, cond\n"
-" .endm\n");
-#endif /* __ASSEMBLY__ */
-
-#endif /* CONFIG_ARM_ASM_UNIFIED */
-
#endif /* !__ASM_UNIFIED_H */
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index 5266fd9ad6b4..783fbb4de5f9 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -92,7 +92,6 @@ EXPORT_SYMBOL(__memset64);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(memchr);
-EXPORT_SYMBOL(__memzero);
EXPORT_SYMBOL(mmioset);
EXPORT_SYMBOL(mmiocpy);
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index fbc707626b3e..1752033b0070 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -82,11 +82,7 @@
#endif
.endm
-#ifdef CONFIG_KPROBES
- .section .kprobes.text,"ax",%progbits
-#else
- .text
-#endif
+ .section .entry.text,"ax",%progbits
/*
* Invalid mode handlers
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index e655dcd0a933..3c4f88701f22 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -37,6 +37,7 @@ saved_pc .req lr
#define TRACE(x...)
#endif
+ .section .entry.text,"ax",%progbits
.align 5
#if !(IS_ENABLED(CONFIG_TRACE_IRQFLAGS) || IS_ENABLED(CONFIG_CONTEXT_TRACKING))
/*
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index 21dde771a7dd..6e0375e7db05 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -105,8 +105,9 @@ __mmap_switched:
ARM( ldmia r4!, {r0, r1, sp} )
THUMB( ldmia r4!, {r0, r1, r3} )
THUMB( mov sp, r3 )
- sub r1, r1, r0
- bl __memzero @ clear .bss
+ sub r2, r1, r0
+ mov r1, #0
+ bl memset @ clear .bss
ldmia r4, {r0, r1, r2, r3}
str r9, [r0] @ Save processor ID
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index af2a7f1e3103..629e25152c0d 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -44,17 +44,17 @@ static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]);
static DEFINE_PER_CPU(struct perf_event *, wp_on_reg[ARM_MAX_WRP]);
/* Number of BRP/WRP registers on this CPU. */
-static int core_num_brps;
-static int core_num_wrps;
+static int core_num_brps __ro_after_init;
+static int core_num_wrps __ro_after_init;
/* Debug architecture version. */
-static u8 debug_arch;
+static u8 debug_arch __ro_after_init;
/* Does debug architecture support OS Save and Restore? */
-static bool has_ossr;
+static bool has_ossr __ro_after_init;
/* Maximum supported watchpoint length. */
-static u8 max_watchpoint_len;
+static u8 max_watchpoint_len __ro_after_init;
#define READ_WB_REG_CASE(OP2, M, VAL) \
case ((OP2 << 4) + M): \
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index b4fbf00ee4ad..2da087926ebe 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -379,6 +379,9 @@ asmlinkage void secondary_start_kernel(void)
cpu_init();
+#ifndef CONFIG_MMU
+ setup_vectors_base();
+#endif
pr_debug("CPU%u: Booted secondary processor\n", cpu);
preempt_disable();
diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c
index 65228bf4c6df..a56e7c856ab5 100644
--- a/arch/arm/kernel/stacktrace.c
+++ b/arch/arm/kernel/stacktrace.c
@@ -3,6 +3,7 @@
#include <linux/sched/debug.h>
#include <linux/stacktrace.h>
+#include <asm/sections.h>
#include <asm/stacktrace.h>
#include <asm/traps.h>
@@ -63,7 +64,6 @@ EXPORT_SYMBOL(walk_stackframe);
#ifdef CONFIG_STACKTRACE
struct stack_trace_data {
struct stack_trace *trace;
- unsigned long last_pc;
unsigned int no_sched_functions;
unsigned int skip;
};
@@ -87,16 +87,7 @@ static int save_trace(struct stackframe *frame, void *d)
if (trace->nr_entries >= trace->max_entries)
return 1;
- /*
- * in_exception_text() is designed to test if the PC is one of
- * the functions which has an exception stack above it, but
- * unfortunately what is in frame->pc is the return LR value,
- * not the saved PC value. So, we need to track the previous
- * frame PC value when doing this.
- */
- addr = data->last_pc;
- data->last_pc = frame->pc;
- if (!in_exception_text(addr))
+ if (!in_entry_text(frame->pc))
return 0;
regs = (struct pt_regs *)frame->sp;
@@ -114,7 +105,6 @@ static noinline void __save_stack_trace(struct task_struct *tsk,
struct stackframe frame;
data.trace = trace;
- data.last_pc = ULONG_MAX;
data.skip = trace->skip;
data.no_sched_functions = nosched;
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 3e26c6f7a191..5e3633c24e63 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -72,7 +72,7 @@ void dump_backtrace_entry(unsigned long where, unsigned long from, unsigned long
printk("Function entered at [<%08lx>] from [<%08lx>]\n", where, from);
#endif
- if (in_exception_text(where))
+ if (in_entry_text(from))
dump_mem("", "Exception stack", frame + 4, frame + 4 + sizeof(struct pt_regs));
}
@@ -433,7 +433,7 @@ static int call_undef_hook(struct pt_regs *regs, unsigned int instr)
return fn ? fn(regs, instr) : 1;
}
-asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
+asmlinkage void do_undefinstr(struct pt_regs *regs)
{
unsigned int instr;
siginfo_t info;
diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S
index ec4b3f94ad80..12b87591eb7c 100644
--- a/arch/arm/kernel/vmlinux-xip.lds.S
+++ b/arch/arm/kernel/vmlinux-xip.lds.S
@@ -96,9 +96,9 @@ SECTIONS
.text : { /* Real text segment */
_stext = .; /* Text and read-only data */
IDMAP_TEXT
- __exception_text_start = .;
- *(.exception.text)
- __exception_text_end = .;
+ __entry_text_start = .;
+ *(.entry.text)
+ __entry_text_end = .;
IRQENTRY_TEXT
TEXT_TEXT
SCHED_TEXT
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index ee53f6518872..84a1ae3ce46e 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -105,9 +105,9 @@ SECTIONS
.text : { /* Real text segment */
_stext = .; /* Text and read-only data */
IDMAP_TEXT
- __exception_text_start = .;
- *(.exception.text)
- __exception_text_end = .;
+ __entry_text_start = .;
+ *(.entry.text)
+ __entry_text_end = .;
IRQENTRY_TEXT
SOFTIRQENTRY_TEXT
TEXT_TEXT
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 4cb0b9624d8f..ad25fd1872c7 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -8,7 +8,7 @@
lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \
csumpartialcopy.o csumpartialcopyuser.o clearbit.o \
delay.o delay-loop.o findbit.o memchr.o memcpy.o \
- memmove.o memset.o memzero.o setbit.o \
+ memmove.o memset.o setbit.o \
strchr.o strrchr.o \
testchangebit.o testclearbit.o testsetbit.o \
ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \
diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S
deleted file mode 100644
index 0eded952e089..000000000000
--- a/arch/arm/lib/memzero.S
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * linux/arch/arm/lib/memzero.S
- *
- * Copyright (C) 1995-2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/unwind.h>
-
- .text
- .align 5
- .word 0
-/*
- * Align the pointer in r0. r3 contains the number of bytes that we are
- * mis-aligned by, and r1 is the number of bytes. If r1 < 4, then we
- * don't bother; we use byte stores instead.
- */
-UNWIND( .fnstart )
-1: subs r1, r1, #4 @ 1 do we have enough
- blt 5f @ 1 bytes to align with?
- cmp r3, #2 @ 1
- strltb r2, [r0], #1 @ 1
- strleb r2, [r0], #1 @ 1
- strb r2, [r0], #1 @ 1
- add r1, r1, r3 @ 1 (r1 = r1 - (4 - r3))
-/*
- * The pointer is now aligned and the length is adjusted. Try doing the
- * memzero again.
- */
-
-ENTRY(__memzero)
- mov r2, #0 @ 1
- ands r3, r0, #3 @ 1 unaligned?
- bne 1b @ 1
-/*
- * r3 = 0, and we know that the pointer in r0 is aligned to a word boundary.
- */
- cmp r1, #16 @ 1 we can skip this chunk if we
- blt 4f @ 1 have < 16 bytes
-
-#if ! CALGN(1)+0
-
-/*
- * We need an extra register for this loop - save the return address and
- * use the LR
- */
- str lr, [sp, #-4]! @ 1
-UNWIND( .fnend )
-UNWIND( .fnstart )
-UNWIND( .save {lr} )
- mov ip, r2 @ 1
- mov lr, r2 @ 1
-
-3: subs r1, r1, #64 @ 1 write 32 bytes out per loop
- stmgeia r0!, {r2, r3, ip, lr} @ 4
- stmgeia r0!, {r2, r3, ip, lr} @ 4
- stmgeia r0!, {r2, r3, ip, lr} @ 4
- stmgeia r0!, {r2, r3, ip, lr} @ 4
- bgt 3b @ 1
- ldmeqfd sp!, {pc} @ 1/2 quick exit
-/*
- * No need to correct the count; we're only testing bits from now on
- */
- tst r1, #32 @ 1
- stmneia r0!, {r2, r3, ip, lr} @ 4
- stmneia r0!, {r2, r3, ip, lr} @ 4
- tst r1, #16 @ 1 16 bytes or more?
- stmneia r0!, {r2, r3, ip, lr} @ 4
- ldr lr, [sp], #4 @ 1
-UNWIND( .fnend )
-
-#else
-
-/*
- * This version aligns the destination pointer in order to write
- * whole cache lines at once.
- */
-
- stmfd sp!, {r4-r7, lr}
-UNWIND( .fnend )
-UNWIND( .fnstart )
-UNWIND( .save {r4-r7, lr} )
- mov r4, r2
- mov r5, r2
- mov r6, r2
- mov r7, r2
- mov ip, r2
- mov lr, r2
-
- cmp r1, #96
- andgts ip, r0, #31
- ble 3f
-
- rsb ip, ip, #32
- sub r1, r1, ip
- movs ip, ip, lsl #(32 - 4)
- stmcsia r0!, {r4, r5, r6, r7}
- stmmiia r0!, {r4, r5}
- movs ip, ip, lsl #2
- strcs r2, [r0], #4
-
-3: subs r1, r1, #64
- stmgeia r0!, {r2-r7, ip, lr}
- stmgeia r0!, {r2-r7, ip, lr}
- bgt 3b
- ldmeqfd sp!, {r4-r7, pc}
-
- tst r1, #32
- stmneia r0!, {r2-r7, ip, lr}
- tst r1, #16
- stmneia r0!, {r4-r7}
- ldmfd sp!, {r4-r7, lr}
-UNWIND( .fnend )
-
-#endif
-
-UNWIND( .fnstart )
-4: tst r1, #8 @ 1 8 bytes or more?
- stmneia r0!, {r2, r3} @ 2
- tst r1, #4 @ 1 4 bytes or more?
- strne r2, [r0], #4 @ 1
-/*
- * When we get here, we've got less than 4 bytes to zero. We
- * may have an unaligned pointer as well.
- */
-5: tst r1, #2 @ 1 2 bytes or more?
- strneb r2, [r0], #1 @ 1
- strneb r2, [r0], #1 @ 1
- tst r1, #1 @ 1 a byte left over
- strneb r2, [r0], #1 @ 1
- ret lr @ 1
-UNWIND( .fnend )
-ENDPROC(__memzero)
diff --git a/arch/arm/mach-davinci/board-dm355-evm.c b/arch/arm/mach-davinci/board-dm355-evm.c
index 62e7bc3018f0..e457f299cd44 100644
--- a/arch/arm/mach-davinci/board-dm355-evm.c
+++ b/arch/arm/mach-davinci/board-dm355-evm.c
@@ -17,6 +17,7 @@
#include <linux/mtd/rawnand.h>
#include <linux/i2c.h>
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/clk.h>
#include <linux/videodev2.h>
#include <media/i2c/tvp514x.h>
@@ -108,11 +109,20 @@ static struct platform_device davinci_nand_device = {
},
};
+static struct gpiod_lookup_table i2c_recovery_gpiod_table = {
+ .dev_id = "i2c_davinci",
+ .table = {
+ GPIO_LOOKUP("davinci_gpio", 15, "sda",
+ GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
+ GPIO_LOOKUP("davinci_gpio", 14, "scl",
+ GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
+ },
+};
+
static struct davinci_i2c_platform_data i2c_pdata = {
.bus_freq = 400 /* kHz */,
.bus_delay = 0 /* usec */,
- .sda_pin = 15,
- .scl_pin = 14,
+ .gpio_recovery = true,
};
static int dm355evm_mmc_gpios = -EINVAL;
@@ -141,6 +151,7 @@ static struct i2c_board_info dm355evm_i2c_info[] = {
static void __init evm_init_i2c(void)
{
+ gpiod_add_lookup_table(&i2c_recovery_gpiod_table);
davinci_init_i2c(&i2c_pdata);
gpio_request(5, "dm355evm_msp");
diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c
index b07c9b18d427..85e6fb33b1ee 100644
--- a/arch/arm/mach-davinci/board-dm644x-evm.c
+++ b/arch/arm/mach-davinci/board-dm644x-evm.c
@@ -13,6 +13,7 @@
#include <linux/dma-mapping.h>
#include <linux/platform_device.h>
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/i2c.h>
#include <linux/platform_data/pcf857x.h>
#include <linux/platform_data/at24.h>
@@ -595,18 +596,28 @@ static struct i2c_board_info __initdata i2c_info[] = {
},
};
+static struct gpiod_lookup_table i2c_recovery_gpiod_table = {
+ .dev_id = "i2c_davinci",
+ .table = {
+ GPIO_LOOKUP("davinci_gpio", 44, "sda",
+ GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
+ GPIO_LOOKUP("davinci_gpio", 43, "scl",
+ GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
+ },
+};
+
/* The msp430 uses a slow bitbanged I2C implementation (ergo 20 KHz),
* which requires 100 usec of idle bus after i2c writes sent to it.
*/
static struct davinci_i2c_platform_data i2c_pdata = {
.bus_freq = 20 /* kHz */,
.bus_delay = 100 /* usec */,
- .sda_pin = 44,
- .scl_pin = 43,
+ .gpio_recovery = true,
};
static void __init evm_init_i2c(void)
{
+ gpiod_add_lookup_table(&i2c_recovery_gpiod_table);
davinci_init_i2c(&i2c_pdata);
i2c_add_driver(&dm6446evm_msp_driver);
i2c_register_board_info(1, i2c_info, ARRAY_SIZE(i2c_info));
diff --git a/arch/arm/mach-sa1100/Kconfig b/arch/arm/mach-sa1100/Kconfig
index 36e3c79f4973..07df3a59b13f 100644
--- a/arch/arm/mach-sa1100/Kconfig
+++ b/arch/arm/mach-sa1100/Kconfig
@@ -5,6 +5,7 @@ menu "SA11x0 Implementations"
config SA1100_ASSABET
bool "Assabet"
select ARM_SA1110_CPUFREQ
+ select GPIO_REG
help
Say Y here if you are using the Intel(R) StrongARM(R) SA-1110
Microprocessor Development Board (also known as the Assabet).
diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c
index d28ecb9ef172..f68241d995f2 100644
--- a/arch/arm/mach-sa1100/assabet.c
+++ b/arch/arm/mach-sa1100/assabet.c
@@ -13,6 +13,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/errno.h>
+#include <linux/gpio/gpio-reg.h>
#include <linux/ioport.h>
#include <linux/platform_data/sa11x0-serial.h>
#include <linux/serial_core.h>
@@ -61,20 +62,45 @@
unsigned long SCR_value = ASSABET_SCR_INIT;
EXPORT_SYMBOL(SCR_value);
-static unsigned long BCR_value = ASSABET_BCR_DB1110;
+static struct gpio_chip *assabet_bcr_gc;
+
+static const char *assabet_names[] = {
+ "cf_pwr", "cf_gfx_reset", "nsoft_reset", "irda_fsel",
+ "irda_md0", "irda_md1", "stereo_loopback", "ncf_bus_on",
+ "audio_pwr_on", "light_pwr_on", "lcd16data", "lcd_pwr_on",
+ "rs232_on", "nred_led", "ngreen_led", "vib_on",
+ "com_dtr", "com_rts", "radio_wake_mod", "i2c_enab",
+ "tvir_enab", "qmute", "radio_pwr_on", "spkr_off",
+ "rs232_valid", "com_dcd", "com_cts", "com_dsr",
+ "radio_cts", "radio_dsr", "radio_dcd", "radio_ri",
+};
+/* The old deprecated interface */
void ASSABET_BCR_frob(unsigned int mask, unsigned int val)
{
- unsigned long flags;
+ unsigned long m = mask, v = val;
- local_irq_save(flags);
- BCR_value = (BCR_value & ~mask) | val;
- ASSABET_BCR = BCR_value;
- local_irq_restore(flags);
+ assabet_bcr_gc->set_multiple(assabet_bcr_gc, &m, &v);
}
-
EXPORT_SYMBOL(ASSABET_BCR_frob);
+static int __init assabet_init_gpio(void __iomem *reg, u32 def_val)
+{
+ struct gpio_chip *gc;
+
+ writel_relaxed(def_val, reg);
+
+ gc = gpio_reg_init(NULL, reg, -1, 32, "assabet", 0xff000000, def_val,
+ assabet_names, NULL, NULL);
+
+ if (IS_ERR(gc))
+ return PTR_ERR(gc);
+
+ assabet_bcr_gc = gc;
+
+ return gc->base;
+}
+
/*
* The codec reset goes to three devices, so we need to release
* the rest when any one of these requests it. However, that
@@ -146,7 +172,7 @@ static void adv7171_write(unsigned reg, unsigned val)
unsigned gpdr = GPDR;
unsigned gplr = GPLR;
- ASSABET_BCR = BCR_value | ASSABET_BCR_AUDIO_ON;
+ ASSABET_BCR_frob(ASSABET_BCR_AUDIO_ON, ASSABET_BCR_AUDIO_ON);
udelay(100);
GPCR = SDA | SCK | MOD; /* clear L3 mode to ensure UDA1341 doesn't respond */
@@ -457,14 +483,6 @@ static void __init assabet_init(void)
sa11x0_ppc_configure_mcp();
if (machine_has_neponset()) {
- /*
- * Angel sets this, but other bootloaders may not.
- *
- * This must precede any driver calls to BCR_set()
- * or BCR_clear().
- */
- ASSABET_BCR = BCR_value = ASSABET_BCR_DB1111;
-
#ifndef CONFIG_ASSABET_NEPONSET
printk( "Warning: Neponset detected but full support "
"hasn't been configured in the kernel\n" );
@@ -748,12 +766,31 @@ static int __init assabet_leds_init(void)
fs_initcall(assabet_leds_init);
#endif
+void __init assabet_init_irq(void)
+{
+ u32 def_val;
+
+ sa1100_init_irq();
+
+ if (machine_has_neponset())
+ def_val = ASSABET_BCR_DB1111;
+ else
+ def_val = ASSABET_BCR_DB1110;
+
+ /*
+ * Angel sets this, but other bootloaders may not.
+ *
+ * This must precede any driver calls to BCR_set() or BCR_clear().
+ */
+ assabet_init_gpio((void *)&ASSABET_BCR, def_val);
+}
+
MACHINE_START(ASSABET, "Intel-Assabet")
.atag_offset = 0x100,
.fixup = fixup_assabet,
.map_io = assabet_map_io,
.nr_irqs = SA1100_NR_IRQS,
- .init_irq = sa1100_init_irq,
+ .init_irq = assabet_init_irq,
.init_time = sa1100_timer_init,
.init_machine = assabet_init,
.init_late = sa11x0_init_late,
diff --git a/arch/arm/mach-sa1100/neponset.c b/arch/arm/mach-sa1100/neponset.c
index a61a2432766b..b1823f445358 100644
--- a/arch/arm/mach-sa1100/neponset.c
+++ b/arch/arm/mach-sa1100/neponset.c
@@ -3,6 +3,8 @@
* linux/arch/arm/mach-sa1100/neponset.c
*/
#include <linux/err.h>
+#include <linux/gpio/driver.h>
+#include <linux/gpio/gpio-reg.h>
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/irq.h>
@@ -45,10 +47,13 @@
#define IRR_USAR (1 << 1)
#define IRR_SA1111 (1 << 2)
+#define NCR_NGPIO 7
+
#define MDM_CTL0_RTS1 (1 << 0)
#define MDM_CTL0_DTR1 (1 << 1)
#define MDM_CTL0_RTS2 (1 << 2)
#define MDM_CTL0_DTR2 (1 << 3)
+#define MDM_CTL0_NGPIO 4
#define MDM_CTL1_CTS1 (1 << 0)
#define MDM_CTL1_DSR1 (1 << 1)
@@ -56,80 +61,87 @@
#define MDM_CTL1_CTS2 (1 << 3)
#define MDM_CTL1_DSR2 (1 << 4)
#define MDM_CTL1_DCD2 (1 << 5)
+#define MDM_CTL1_NGPIO 6
#define AUD_SEL_1341 (1 << 0)
#define AUD_MUTE_1341 (1 << 1)
+#define AUD_NGPIO 2
extern void sa1110_mb_disable(void);
+#define to_neponset_gpio_chip(x) container_of(x, struct neponset_gpio_chip, gc)
+
+static const char *neponset_ncr_names[] = {
+ "gp01_off", "tp_power", "ms_power", "enet_osc",
+ "spi_kb_wk_up", "a0vpp", "a1vpp"
+};
+
+static const char *neponset_mdmctl0_names[] = {
+ "rts3", "dtr3", "rts1", "dtr1",
+};
+
+static const char *neponset_mdmctl1_names[] = {
+ "cts3", "dsr3", "dcd3", "cts1", "dsr1", "dcd1"
+};
+
+static const char *neponset_aud_names[] = {
+ "sel_1341", "mute_1341",
+};
+
struct neponset_drvdata {
void __iomem *base;
struct platform_device *sa1111;
struct platform_device *smc91x;
unsigned irq_base;
-#ifdef CONFIG_PM_SLEEP
- u32 ncr0;
- u32 mdm_ctl_0;
-#endif
+ struct gpio_chip *gpio[4];
};
-static void __iomem *nep_base;
+static struct neponset_drvdata *nep;
void neponset_ncr_frob(unsigned int mask, unsigned int val)
{
- void __iomem *base = nep_base;
-
- if (base) {
- unsigned long flags;
- unsigned v;
-
- local_irq_save(flags);
- v = readb_relaxed(base + NCR_0);
- writeb_relaxed((v & ~mask) | val, base + NCR_0);
- local_irq_restore(flags);
- } else {
- WARN(1, "nep_base unset\n");
- }
+ struct neponset_drvdata *n = nep;
+ unsigned long m = mask, v = val;
+
+ if (nep)
+ n->gpio[0]->set_multiple(n->gpio[0], &m, &v);
+ else
+ WARN(1, "nep unset\n");
}
EXPORT_SYMBOL(neponset_ncr_frob);
static void neponset_set_mctrl(struct uart_port *port, u_int mctrl)
{
- void __iomem *base = nep_base;
- u_int mdm_ctl0;
+ struct neponset_drvdata *n = nep;
+ unsigned long mask, val = 0;
- if (!base)
+ if (!n)
return;
- mdm_ctl0 = readb_relaxed(base + MDM_CTL_0);
if (port->mapbase == _Ser1UTCR0) {
- if (mctrl & TIOCM_RTS)
- mdm_ctl0 &= ~MDM_CTL0_RTS2;
- else
- mdm_ctl0 |= MDM_CTL0_RTS2;
-
- if (mctrl & TIOCM_DTR)
- mdm_ctl0 &= ~MDM_CTL0_DTR2;
- else
- mdm_ctl0 |= MDM_CTL0_DTR2;
+ mask = MDM_CTL0_RTS2 | MDM_CTL0_DTR2;
+
+ if (!(mctrl & TIOCM_RTS))
+ val |= MDM_CTL0_RTS2;
+
+ if (!(mctrl & TIOCM_DTR))
+ val |= MDM_CTL0_DTR2;
} else if (port->mapbase == _Ser3UTCR0) {
- if (mctrl & TIOCM_RTS)
- mdm_ctl0 &= ~MDM_CTL0_RTS1;
- else
- mdm_ctl0 |= MDM_CTL0_RTS1;
-
- if (mctrl & TIOCM_DTR)
- mdm_ctl0 &= ~MDM_CTL0_DTR1;
- else
- mdm_ctl0 |= MDM_CTL0_DTR1;
+ mask = MDM_CTL0_RTS1 | MDM_CTL0_DTR1;
+
+ if (!(mctrl & TIOCM_RTS))
+ val |= MDM_CTL0_RTS1;
+
+ if (!(mctrl & TIOCM_DTR))
+ val |= MDM_CTL0_DTR1;
}
- writeb_relaxed(mdm_ctl0, base + MDM_CTL_0);
+ n->gpio[1]->set_multiple(n->gpio[1], &mask, &val);
}
static u_int neponset_get_mctrl(struct uart_port *port)
{
- void __iomem *base = nep_base;
+ void __iomem *base = nep->base;
u_int ret = TIOCM_CD | TIOCM_CTS | TIOCM_DSR;
u_int mdm_ctl1;
@@ -231,6 +243,22 @@ static struct irq_chip nochip = {
.irq_unmask = nochip_noop,
};
+static int neponset_init_gpio(struct gpio_chip **gcp,
+ struct device *dev, const char *label, void __iomem *reg,
+ unsigned num, bool in, const char *const * names)
+{
+ struct gpio_chip *gc;
+
+ gc = gpio_reg_init(dev, reg, -1, num, label, in ? 0xffffffff : 0,
+ readl_relaxed(reg), names, NULL, NULL);
+ if (IS_ERR(gc))
+ return PTR_ERR(gc);
+
+ *gcp = gc;
+
+ return 0;
+}
+
static struct sa1111_platform_data sa1111_info = {
.disable_devs = SA1111_DEVID_PS2_MSE,
};
@@ -274,7 +302,7 @@ static int neponset_probe(struct platform_device *dev)
};
int ret, irq;
- if (nep_base)
+ if (nep)
return -EBUSY;
irq = ret = platform_get_irq(dev, 0);
@@ -330,6 +358,22 @@ static int neponset_probe(struct platform_device *dev)
irq_set_irq_type(irq, IRQ_TYPE_EDGE_RISING);
irq_set_chained_handler_and_data(irq, neponset_irq_handler, d);
+ /* Disable GPIO 0/1 drivers so the buttons work on the Assabet */
+ writeb_relaxed(NCR_GP01_OFF, d->base + NCR_0);
+
+ neponset_init_gpio(&d->gpio[0], &dev->dev, "neponset-ncr",
+ d->base + NCR_0, NCR_NGPIO, false,
+ neponset_ncr_names);
+ neponset_init_gpio(&d->gpio[1], &dev->dev, "neponset-mdm-ctl0",
+ d->base + MDM_CTL_0, MDM_CTL0_NGPIO, false,
+ neponset_mdmctl0_names);
+ neponset_init_gpio(&d->gpio[2], &dev->dev, "neponset-mdm-ctl1",
+ d->base + MDM_CTL_1, MDM_CTL1_NGPIO, true,
+ neponset_mdmctl1_names);
+ neponset_init_gpio(&d->gpio[3], &dev->dev, "neponset-aud-ctl",
+ d->base + AUD_CTL, AUD_NGPIO, false,
+ neponset_aud_names);
+
/*
* We would set IRQ_GPIO25 to be a wake-up IRQ, but unfortunately
* something on the Neponset activates this IRQ on sleep (eth?)
@@ -340,16 +384,13 @@ static int neponset_probe(struct platform_device *dev)
dev_info(&dev->dev, "Neponset daughter board, providing IRQ%u-%u\n",
d->irq_base, d->irq_base + NEP_IRQ_NR - 1);
- nep_base = d->base;
+ nep = d;
sa1100_register_uart_fns(&neponset_port_fns);
/* Ensure that the memory bus request/grant signals are setup */
sa1110_mb_disable();
- /* Disable GPIO 0/1 drivers so the buttons work on the Assabet */
- writeb_relaxed(NCR_GP01_OFF, d->base + NCR_0);
-
sa1111_resources[0].parent = sa1111_res;
sa1111_resources[1].start = d->irq_base + NEP_IRQ_SA1111;
sa1111_resources[1].end = d->irq_base + NEP_IRQ_SA1111;
@@ -385,7 +426,7 @@ static int neponset_remove(struct platform_device *dev)
platform_device_unregister(d->smc91x);
irq_set_chained_handler(irq, NULL);
irq_free_descs(d->irq_base, NEP_IRQ_NR);
- nep_base = NULL;
+ nep = NULL;
iounmap(d->base);
kfree(d);
@@ -393,30 +434,22 @@ static int neponset_remove(struct platform_device *dev)
}
#ifdef CONFIG_PM_SLEEP
-static int neponset_suspend(struct device *dev)
-{
- struct neponset_drvdata *d = dev_get_drvdata(dev);
-
- d->ncr0 = readb_relaxed(d->base + NCR_0);
- d->mdm_ctl_0 = readb_relaxed(d->base + MDM_CTL_0);
-
- return 0;
-}
-
static int neponset_resume(struct device *dev)
{
struct neponset_drvdata *d = dev_get_drvdata(dev);
+ int i, ret = 0;
- writeb_relaxed(d->ncr0, d->base + NCR_0);
- writeb_relaxed(d->mdm_ctl_0, d->base + MDM_CTL_0);
+ for (i = 0; i < ARRAY_SIZE(d->gpio); i++) {
+ ret = gpio_reg_resume(d->gpio[i]);
+ if (ret)
+ break;
+ }
- return 0;
+ return ret;
}
static const struct dev_pm_ops neponset_pm_ops = {
- .suspend_noirq = neponset_suspend,
.resume_noirq = neponset_resume,
- .freeze_noirq = neponset_suspend,
.restore_noirq = neponset_resume,
};
#define PM_OPS &neponset_pm_ops
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index fd9077a74fce..7f14acf67caf 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -909,6 +909,14 @@ config OUTER_CACHE_SYNC
The outer cache has a outer_cache_fns.sync function pointer
that can be used to drain the write buffer of the outer cache.
+config CACHE_B15_RAC
+ bool "Enable the Broadcom Brahma-B15 read-ahead cache controller"
+ depends on ARCH_BRCMSTB
+ default y
+ help
+ This option enables the Broadcom Brahma-B15 read-ahead cache
+ controller. If disabled, the read-ahead cache remains off.
+
config CACHE_FEROCEON_L2
bool "Enable the Feroceon L2 cache controller"
depends on ARCH_MV78XX0 || ARCH_MVEBU
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 01bcc33f59e3..9dbb84923e12 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -13,7 +13,8 @@ obj-y += nommu.o
obj-$(CONFIG_ARM_MPU) += pmsa-v7.o
endif
-obj-$(CONFIG_ARM_PTDUMP) += dump.o
+obj-$(CONFIG_ARM_PTDUMP_CORE) += dump.o
+obj-$(CONFIG_ARM_PTDUMP_DEBUGFS) += ptdump_debugfs.o
obj-$(CONFIG_MODULES) += proc-syms.o
obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o
@@ -103,6 +104,7 @@ AFLAGS_proc-v6.o :=-Wa,-march=armv6
AFLAGS_proc-v7.o :=-Wa,-march=armv7-a
obj-$(CONFIG_OUTER_CACHE) += l2c-common.o
+obj-$(CONFIG_CACHE_B15_RAC) += cache-b15-rac.o
obj-$(CONFIG_CACHE_FEROCEON_L2) += cache-feroceon-l2.o
obj-$(CONFIG_CACHE_L2X0) += cache-l2x0.o l2c-l2x0-resume.o
obj-$(CONFIG_CACHE_L2X0_PMU) += cache-l2x0-pmu.o
diff --git a/arch/arm/mm/cache-b15-rac.c b/arch/arm/mm/cache-b15-rac.c
new file mode 100644
index 000000000000..d9586ba2e63c
--- /dev/null
+++ b/arch/arm/mm/cache-b15-rac.c
@@ -0,0 +1,356 @@
+/*
+ * Broadcom Brahma-B15 CPU read-ahead cache management functions
+ *
+ * Copyright (C) 2015-2016 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/io.h>
+#include <linux/bitops.h>
+#include <linux/of_address.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/syscore_ops.h>
+#include <linux/reboot.h>
+
+#include <asm/cacheflush.h>
+#include <asm/hardware/cache-b15-rac.h>
+
+extern void v7_flush_kern_cache_all(void);
+
+/* RAC register offsets, relative to the HIF_CPU_BIUCTRL register base */
+#define RAC_CONFIG0_REG (0x78)
+#define RACENPREF_MASK (0x3)
+#define RACPREFINST_SHIFT (0)
+#define RACENINST_SHIFT (2)
+#define RACPREFDATA_SHIFT (4)
+#define RACENDATA_SHIFT (6)
+#define RAC_CPU_SHIFT (8)
+#define RACCFG_MASK (0xff)
+#define RAC_CONFIG1_REG (0x7c)
+#define RAC_FLUSH_REG (0x80)
+#define FLUSH_RAC (1 << 0)
+
+/* Bitmask to enable instruction and data prefetching with a 256-bytes stride */
+#define RAC_DATA_INST_EN_MASK (1 << RACPREFINST_SHIFT | \
+ RACENPREF_MASK << RACENINST_SHIFT | \
+ 1 << RACPREFDATA_SHIFT | \
+ RACENPREF_MASK << RACENDATA_SHIFT)
+
+#define RAC_ENABLED 0
+/* Special state where we want to bypass the spinlock and call directly
+ * into the v7 cache maintenance operations during suspend/resume
+ */
+#define RAC_SUSPENDED 1
+
+static void __iomem *b15_rac_base;
+static DEFINE_SPINLOCK(rac_lock);
+
+static u32 rac_config0_reg;
+
+/* Initialization flag to avoid checking for b15_rac_base, and to prevent
+ * multi-platform kernels from crashing here as well.
+ */
+static unsigned long b15_rac_flags;
+
+static inline u32 __b15_rac_disable(void)
+{
+ u32 val = __raw_readl(b15_rac_base + RAC_CONFIG0_REG);
+ __raw_writel(0, b15_rac_base + RAC_CONFIG0_REG);
+ dmb();
+ return val;
+}
+
+static inline void __b15_rac_flush(void)
+{
+ u32 reg;
+
+ __raw_writel(FLUSH_RAC, b15_rac_base + RAC_FLUSH_REG);
+ do {
+ /* This dmb() is required to force the Bus Interface Unit
+ * to clean oustanding writes, and forces an idle cycle
+ * to be inserted.
+ */
+ dmb();
+ reg = __raw_readl(b15_rac_base + RAC_FLUSH_REG);
+ } while (reg & FLUSH_RAC);
+}
+
+static inline u32 b15_rac_disable_and_flush(void)
+{
+ u32 reg;
+
+ reg = __b15_rac_disable();
+ __b15_rac_flush();
+ return reg;
+}
+
+static inline void __b15_rac_enable(u32 val)
+{
+ __raw_writel(val, b15_rac_base + RAC_CONFIG0_REG);
+ /* dsb() is required here to be consistent with __flush_icache_all() */
+ dsb();
+}
+
+#define BUILD_RAC_CACHE_OP(name, bar) \
+void b15_flush_##name(void) \
+{ \
+ unsigned int do_flush; \
+ u32 val = 0; \
+ \
+ if (test_bit(RAC_SUSPENDED, &b15_rac_flags)) { \
+ v7_flush_##name(); \
+ bar; \
+ return; \
+ } \
+ \
+ spin_lock(&rac_lock); \
+ do_flush = test_bit(RAC_ENABLED, &b15_rac_flags); \
+ if (do_flush) \
+ val = b15_rac_disable_and_flush(); \
+ v7_flush_##name(); \
+ if (!do_flush) \
+ bar; \
+ else \
+ __b15_rac_enable(val); \
+ spin_unlock(&rac_lock); \
+}
+
+#define nobarrier
+
+/* The readahead cache present in the Brahma-B15 CPU is a special piece of
+ * hardware after the integrated L2 cache of the B15 CPU complex whose purpose
+ * is to prefetch instruction and/or data with a line size of either 64 bytes
+ * or 256 bytes. The rationale is that the data-bus of the CPU interface is
+ * optimized for 256-bytes transactions, and enabling the readahead cache
+ * provides a significant performance boost we want it enabled (typically
+ * twice the performance for a memcpy benchmark application).
+ *
+ * The readahead cache is transparent for Modified Virtual Addresses
+ * cache maintenance operations: ICIMVAU, DCIMVAC, DCCMVAC, DCCMVAU and
+ * DCCIMVAC.
+ *
+ * It is however not transparent for the following cache maintenance
+ * operations: DCISW, DCCSW, DCCISW, ICIALLUIS and ICIALLU which is precisely
+ * what we are patching here with our BUILD_RAC_CACHE_OP here.
+ */
+BUILD_RAC_CACHE_OP(kern_cache_all, nobarrier);
+
+static void b15_rac_enable(void)
+{
+ unsigned int cpu;
+ u32 enable = 0;
+
+ for_each_possible_cpu(cpu)
+ enable |= (RAC_DATA_INST_EN_MASK << (cpu * RAC_CPU_SHIFT));
+
+ b15_rac_disable_and_flush();
+ __b15_rac_enable(enable);
+}
+
+static int b15_rac_reboot_notifier(struct notifier_block *nb,
+ unsigned long action,
+ void *data)
+{
+ /* During kexec, we are not yet migrated on the boot CPU, so we need to
+ * make sure we are SMP safe here. Once the RAC is disabled, flag it as
+ * suspended such that the hotplug notifier returns early.
+ */
+ if (action == SYS_RESTART) {
+ spin_lock(&rac_lock);
+ b15_rac_disable_and_flush();
+ clear_bit(RAC_ENABLED, &b15_rac_flags);
+ set_bit(RAC_SUSPENDED, &b15_rac_flags);
+ spin_unlock(&rac_lock);
+ }
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block b15_rac_reboot_nb = {
+ .notifier_call = b15_rac_reboot_notifier,
+};
+
+/* The CPU hotplug case is the most interesting one, we basically need to make
+ * sure that the RAC is disabled for the entire system prior to having a CPU
+ * die, in particular prior to this dying CPU having exited the coherency
+ * domain.
+ *
+ * Once this CPU is marked dead, we can safely re-enable the RAC for the
+ * remaining CPUs in the system which are still online.
+ *
+ * Offlining a CPU is the problematic case, onlining a CPU is not much of an
+ * issue since the CPU and its cache-level hierarchy will start filling with
+ * the RAC disabled, so L1 and L2 only.
+ *
+ * In this function, we should NOT have to verify any unsafe setting/condition
+ * b15_rac_base:
+ *
+ * It is protected by the RAC_ENABLED flag which is cleared by default, and
+ * being cleared when initial procedure is done. b15_rac_base had been set at
+ * that time.
+ *
+ * RAC_ENABLED:
+ * There is a small timing windows, in b15_rac_init(), between
+ * cpuhp_setup_state_*()
+ * ...
+ * set RAC_ENABLED
+ * However, there is no hotplug activity based on the Linux booting procedure.
+ *
+ * Since we have to disable RAC for all cores, we keep RAC on as long as as
+ * possible (disable it as late as possible) to gain the cache benefit.
+ *
+ * Thus, dying/dead states are chosen here
+ *
+ * We are choosing not do disable the RAC on a per-CPU basis, here, if we did
+ * we would want to consider disabling it as early as possible to benefit the
+ * other active CPUs.
+ */
+
+/* Running on the dying CPU */
+static int b15_rac_dying_cpu(unsigned int cpu)
+{
+ /* During kexec/reboot, the RAC is disabled via the reboot notifier
+ * return early here.
+ */
+ if (test_bit(RAC_SUSPENDED, &b15_rac_flags))
+ return 0;
+
+ spin_lock(&rac_lock);
+
+ /* Indicate that we are starting a hotplug procedure */
+ __clear_bit(RAC_ENABLED, &b15_rac_flags);
+
+ /* Disable the readahead cache and save its value to a global */
+ rac_config0_reg = b15_rac_disable_and_flush();
+
+ spin_unlock(&rac_lock);
+
+ return 0;
+}
+
+/* Running on a non-dying CPU */
+static int b15_rac_dead_cpu(unsigned int cpu)
+{
+ /* During kexec/reboot, the RAC is disabled via the reboot notifier
+ * return early here.
+ */
+ if (test_bit(RAC_SUSPENDED, &b15_rac_flags))
+ return 0;
+
+ spin_lock(&rac_lock);
+
+ /* And enable it */
+ __b15_rac_enable(rac_config0_reg);
+ __set_bit(RAC_ENABLED, &b15_rac_flags);
+
+ spin_unlock(&rac_lock);
+
+ return 0;
+}
+
+static int b15_rac_suspend(void)
+{
+ /* Suspend the read-ahead cache oeprations, forcing our cache
+ * implementation to fallback to the regular ARMv7 calls.
+ *
+ * We are guaranteed to be running on the boot CPU at this point and
+ * with every other CPU quiesced, so setting RAC_SUSPENDED is not racy
+ * here.
+ */
+ rac_config0_reg = b15_rac_disable_and_flush();
+ set_bit(RAC_SUSPENDED, &b15_rac_flags);
+
+ return 0;
+}
+
+static void b15_rac_resume(void)
+{
+ /* Coming out of a S3 suspend/resume cycle, the read-ahead cache
+ * register RAC_CONFIG0_REG will be restored to its default value, make
+ * sure we re-enable it and set the enable flag, we are also guaranteed
+ * to run on the boot CPU, so not racy again.
+ */
+ __b15_rac_enable(rac_config0_reg);
+ clear_bit(RAC_SUSPENDED, &b15_rac_flags);
+}
+
+static struct syscore_ops b15_rac_syscore_ops = {
+ .suspend = b15_rac_suspend,
+ .resume = b15_rac_resume,
+};
+
+static int __init b15_rac_init(void)
+{
+ struct device_node *dn;
+ int ret = 0, cpu;
+ u32 reg, en_mask = 0;
+
+ dn = of_find_compatible_node(NULL, NULL, "brcm,brcmstb-cpu-biu-ctrl");
+ if (!dn)
+ return -ENODEV;
+
+ if (WARN(num_possible_cpus() > 4, "RAC only supports 4 CPUs\n"))
+ goto out;
+
+ b15_rac_base = of_iomap(dn, 0);
+ if (!b15_rac_base) {
+ pr_err("failed to remap BIU control base\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = register_reboot_notifier(&b15_rac_reboot_nb);
+ if (ret) {
+ pr_err("failed to register reboot notifier\n");
+ iounmap(b15_rac_base);
+ goto out;
+ }
+
+ if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) {
+ ret = cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CACHE_B15_RAC_DEAD,
+ "arm/cache-b15-rac:dead",
+ NULL, b15_rac_dead_cpu);
+ if (ret)
+ goto out_unmap;
+
+ ret = cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CACHE_B15_RAC_DYING,
+ "arm/cache-b15-rac:dying",
+ NULL, b15_rac_dying_cpu);
+ if (ret)
+ goto out_cpu_dead;
+ }
+
+ if (IS_ENABLED(CONFIG_PM_SLEEP))
+ register_syscore_ops(&b15_rac_syscore_ops);
+
+ spin_lock(&rac_lock);
+ reg = __raw_readl(b15_rac_base + RAC_CONFIG0_REG);
+ for_each_possible_cpu(cpu)
+ en_mask |= ((1 << RACPREFDATA_SHIFT) << (cpu * RAC_CPU_SHIFT));
+ WARN(reg & en_mask, "Read-ahead cache not previously disabled\n");
+
+ b15_rac_enable();
+ set_bit(RAC_ENABLED, &b15_rac_flags);
+ spin_unlock(&rac_lock);
+
+ pr_info("Broadcom Brahma-B15 readahead cache at: 0x%p\n",
+ b15_rac_base + RAC_CONFIG0_REG);
+
+ goto out;
+
+out_cpu_dead:
+ cpuhp_remove_state_nocalls(CPUHP_AP_ARM_CACHE_B15_RAC_DYING);
+out_unmap:
+ unregister_reboot_notifier(&b15_rac_reboot_nb);
+ iounmap(b15_rac_base);
+out:
+ of_node_put(dn);
+ return ret;
+}
+arch_initcall(b15_rac_init);
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index de78109d002d..215df435bfb9 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -15,6 +15,7 @@
#include <asm/assembler.h>
#include <asm/errno.h>
#include <asm/unwind.h>
+#include <asm/hardware/cache-b15-rac.h>
#include "proc-macros.S"
@@ -446,3 +447,23 @@ ENDPROC(v7_dma_unmap_area)
@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
define_cache_functions v7
+
+ /* The Broadcom Brahma-B15 read-ahead cache requires some modifications
+ * to the v7_cache_fns, we only override the ones we need
+ */
+#ifndef CONFIG_CACHE_B15_RAC
+ globl_equ b15_flush_kern_cache_all, v7_flush_kern_cache_all
+#endif
+ globl_equ b15_flush_icache_all, v7_flush_icache_all
+ globl_equ b15_flush_kern_cache_louis, v7_flush_kern_cache_louis
+ globl_equ b15_flush_user_cache_all, v7_flush_user_cache_all
+ globl_equ b15_flush_user_cache_range, v7_flush_user_cache_range
+ globl_equ b15_coherent_kern_range, v7_coherent_kern_range
+ globl_equ b15_coherent_user_range, v7_coherent_user_range
+ globl_equ b15_flush_kern_dcache_area, v7_flush_kern_dcache_area
+
+ globl_equ b15_dma_map_area, v7_dma_map_area
+ globl_equ b15_dma_unmap_area, v7_dma_unmap_area
+ globl_equ b15_dma_flush_range, v7_dma_flush_range
+
+ define_cache_functions b15
diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c
index fc3b44028cfb..084779c5c893 100644
--- a/arch/arm/mm/dump.c
+++ b/arch/arm/mm/dump.c
@@ -21,11 +21,7 @@
#include <asm/fixmap.h>
#include <asm/memory.h>
#include <asm/pgtable.h>
-
-struct addr_marker {
- unsigned long start_address;
- const char *name;
-};
+#include <asm/ptdump.h>
static struct addr_marker address_markers[] = {
{ MODULES_VADDR, "Modules" },
@@ -38,12 +34,26 @@ static struct addr_marker address_markers[] = {
{ -1, NULL },
};
+#define pt_dump_seq_printf(m, fmt, args...) \
+({ \
+ if (m) \
+ seq_printf(m, fmt, ##args); \
+})
+
+#define pt_dump_seq_puts(m, fmt) \
+({ \
+ if (m) \
+ seq_printf(m, fmt); \
+})
+
struct pg_state {
struct seq_file *seq;
const struct addr_marker *marker;
unsigned long start_address;
unsigned level;
u64 current_prot;
+ bool check_wx;
+ unsigned long wx_pages;
const char *current_domain;
};
@@ -52,6 +62,8 @@ struct prot_bits {
u64 val;
const char *set;
const char *clear;
+ bool ro_bit;
+ bool nx_bit;
};
static const struct prot_bits pte_bits[] = {
@@ -65,11 +77,13 @@ static const struct prot_bits pte_bits[] = {
.val = L_PTE_RDONLY,
.set = "ro",
.clear = "RW",
+ .ro_bit = true,
}, {
.mask = L_PTE_XN,
.val = L_PTE_XN,
.set = "NX",
.clear = "x ",
+ .nx_bit = true,
}, {
.mask = L_PTE_SHARED,
.val = L_PTE_SHARED,
@@ -133,11 +147,13 @@ static const struct prot_bits section_bits[] = {
.val = L_PMD_SECT_RDONLY | PMD_SECT_AP2,
.set = "ro",
.clear = "RW",
+ .ro_bit = true,
#elif __LINUX_ARM_ARCH__ >= 6
{
.mask = PMD_SECT_APX | PMD_SECT_AP_READ | PMD_SECT_AP_WRITE,
.val = PMD_SECT_APX | PMD_SECT_AP_WRITE,
.set = " ro",
+ .ro_bit = true,
}, {
.mask = PMD_SECT_APX | PMD_SECT_AP_READ | PMD_SECT_AP_WRITE,
.val = PMD_SECT_AP_WRITE,
@@ -156,6 +172,7 @@ static const struct prot_bits section_bits[] = {
.mask = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE,
.val = 0,
.set = " ro",
+ .ro_bit = true,
}, {
.mask = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE,
.val = PMD_SECT_AP_WRITE,
@@ -174,6 +191,7 @@ static const struct prot_bits section_bits[] = {
.val = PMD_SECT_XN,
.set = "NX",
.clear = "x ",
+ .nx_bit = true,
}, {
.mask = PMD_SECT_S,
.val = PMD_SECT_S,
@@ -186,6 +204,8 @@ struct pg_level {
const struct prot_bits *bits;
size_t num;
u64 mask;
+ const struct prot_bits *ro_bit;
+ const struct prot_bits *nx_bit;
};
static struct pg_level pg_level[] = {
@@ -214,10 +234,27 @@ static void dump_prot(struct pg_state *st, const struct prot_bits *bits, size_t
s = bits->clear;
if (s)
- seq_printf(st->seq, " %s", s);
+ pt_dump_seq_printf(st->seq, " %s", s);
}
}
+static void note_prot_wx(struct pg_state *st, unsigned long addr)
+{
+ if (!st->check_wx)
+ return;
+ if ((st->current_prot & pg_level[st->level].ro_bit->mask) ==
+ pg_level[st->level].ro_bit->val)
+ return;
+ if ((st->current_prot & pg_level[st->level].nx_bit->mask) ==
+ pg_level[st->level].nx_bit->val)
+ return;
+
+ WARN_ONCE(1, "arm/mm: Found insecure W+X mapping at address %pS\n",
+ (void *)st->start_address);
+
+ st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
+}
+
static void note_page(struct pg_state *st, unsigned long addr,
unsigned int level, u64 val, const char *domain)
{
@@ -228,7 +265,7 @@ static void note_page(struct pg_state *st, unsigned long addr,
st->level = level;
st->current_prot = prot;
st->current_domain = domain;
- seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+ pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
} else if (prot != st->current_prot || level != st->level ||
domain != st->current_domain ||
addr >= st->marker[1].start_address) {
@@ -236,7 +273,8 @@ static void note_page(struct pg_state *st, unsigned long addr,
unsigned long delta;
if (st->current_prot) {
- seq_printf(st->seq, "0x%08lx-0x%08lx ",
+ note_prot_wx(st, addr);
+ pt_dump_seq_printf(st->seq, "0x%08lx-0x%08lx ",
st->start_address, addr);
delta = (addr - st->start_address) >> 10;
@@ -244,17 +282,19 @@ static void note_page(struct pg_state *st, unsigned long addr,
delta >>= 10;
unit++;
}
- seq_printf(st->seq, "%9lu%c", delta, *unit);
+ pt_dump_seq_printf(st->seq, "%9lu%c", delta, *unit);
if (st->current_domain)
- seq_printf(st->seq, " %s", st->current_domain);
+ pt_dump_seq_printf(st->seq, " %s",
+ st->current_domain);
if (pg_level[st->level].bits)
dump_prot(st, pg_level[st->level].bits, pg_level[st->level].num);
- seq_printf(st->seq, "\n");
+ pt_dump_seq_printf(st->seq, "\n");
}
if (addr >= st->marker[1].start_address) {
st->marker++;
- seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+ pt_dump_seq_printf(st->seq, "---[ %s ]---\n",
+ st->marker->name);
}
st->start_address = addr;
st->current_prot = prot;
@@ -335,61 +375,82 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
}
}
-static void walk_pgd(struct seq_file *m)
+static void walk_pgd(struct pg_state *st, struct mm_struct *mm,
+ unsigned long start)
{
- pgd_t *pgd = swapper_pg_dir;
- struct pg_state st;
- unsigned long addr;
+ pgd_t *pgd = pgd_offset(mm, 0UL);
unsigned i;
-
- memset(&st, 0, sizeof(st));
- st.seq = m;
- st.marker = address_markers;
+ unsigned long addr;
for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
- addr = i * PGDIR_SIZE;
+ addr = start + i * PGDIR_SIZE;
if (!pgd_none(*pgd)) {
- walk_pud(&st, pgd, addr);
+ walk_pud(st, pgd, addr);
} else {
- note_page(&st, addr, 1, pgd_val(*pgd), NULL);
+ note_page(st, addr, 1, pgd_val(*pgd), NULL);
}
}
-
- note_page(&st, 0, 0, 0, NULL);
}
-static int ptdump_show(struct seq_file *m, void *v)
+void ptdump_walk_pgd(struct seq_file *m, struct ptdump_info *info)
{
- walk_pgd(m);
- return 0;
-}
+ struct pg_state st = {
+ .seq = m,
+ .marker = info->markers,
+ .check_wx = false,
+ };
-static int ptdump_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ptdump_show, NULL);
+ walk_pgd(&st, info->mm, info->base_addr);
+ note_page(&st, 0, 0, 0, NULL);
}
-static const struct file_operations ptdump_fops = {
- .open = ptdump_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-static int ptdump_init(void)
+static void ptdump_initialize(void)
{
- struct dentry *pe;
unsigned i, j;
for (i = 0; i < ARRAY_SIZE(pg_level); i++)
if (pg_level[i].bits)
- for (j = 0; j < pg_level[i].num; j++)
+ for (j = 0; j < pg_level[i].num; j++) {
pg_level[i].mask |= pg_level[i].bits[j].mask;
+ if (pg_level[i].bits[j].ro_bit)
+ pg_level[i].ro_bit = &pg_level[i].bits[j];
+ if (pg_level[i].bits[j].nx_bit)
+ pg_level[i].nx_bit = &pg_level[i].bits[j];
+ }
address_markers[2].start_address = VMALLOC_START;
+}
+
+static struct ptdump_info kernel_ptdump_info = {
+ .mm = &init_mm,
+ .markers = address_markers,
+ .base_addr = 0,
+};
- pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL,
- &ptdump_fops);
- return pe ? 0 : -ENOMEM;
+void ptdump_check_wx(void)
+{
+ struct pg_state st = {
+ .seq = NULL,
+ .marker = (struct addr_marker[]) {
+ { 0, NULL},
+ { -1, NULL},
+ },
+ .check_wx = true,
+ };
+
+ walk_pgd(&st, &init_mm, 0);
+ note_page(&st, 0, 0, 0, NULL);
+ if (st.wx_pages)
+ pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n",
+ st.wx_pages);
+ else
+ pr_info("Checked W+X mappings: passed, no W+X pages found\n");
+}
+
+static int ptdump_init(void)
+{
+ ptdump_initialize();
+ return ptdump_debugfs_register(&kernel_ptdump_info,
+ "kernel_page_tables");
}
__initcall(ptdump_init);
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 42f585379e19..b75eada23d0a 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -21,7 +21,6 @@
#include <linux/highmem.h>
#include <linux/perf_event.h>
-#include <asm/exception.h>
#include <asm/pgtable.h>
#include <asm/system_misc.h>
#include <asm/system_info.h>
@@ -545,7 +544,7 @@ hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *)
/*
* Dispatch a data abort to the relevant handler.
*/
-asmlinkage void __exception
+asmlinkage void
do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
const struct fsr_info *inf = fsr_info + fsr_fs(fsr);
@@ -578,7 +577,7 @@ hook_ifault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *
ifsr_info[nr].name = name;
}
-asmlinkage void __exception
+asmlinkage void
do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs)
{
const struct fsr_info *inf = ifsr_info + fsr_fs(ifsr);
diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c
index 10bfba85eb96..1d1edd064199 100644
--- a/arch/arm/mm/idmap.c
+++ b/arch/arm/mm/idmap.c
@@ -16,8 +16,8 @@
* are not supported on any CPU using the idmap tables as its current
* page tables.
*/
-pgd_t *idmap_pgd;
-long long arch_phys_to_idmap_offset;
+pgd_t *idmap_pgd __ro_after_init;
+long long arch_phys_to_idmap_offset __ro_after_init;
#ifdef CONFIG_ARM_LPAE
static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end,
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index a1f11a7ee81b..bd6f4513539a 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -36,6 +36,7 @@
#include <asm/system_info.h>
#include <asm/tlb.h>
#include <asm/fixmap.h>
+#include <asm/ptdump.h>
#include <asm/mach/arch.h>
#include <asm/mach/map.h>
@@ -738,6 +739,7 @@ static int __mark_rodata_ro(void *unused)
void mark_rodata_ro(void)
{
stop_machine(__mark_rodata_ro, NULL, NULL);
+ debug_checkwx();
}
void set_kernel_text_rw(void)
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index e4370810f4f1..7c087961b7ce 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -31,7 +31,7 @@ struct mpu_rgn_info mpu_rgn_info;
#ifdef CONFIG_CPU_CP15
#ifdef CONFIG_CPU_HIGH_VECTOR
-static unsigned long __init setup_vectors_base(void)
+unsigned long setup_vectors_base(void)
{
unsigned long reg = get_cr();
@@ -57,7 +57,7 @@ static inline bool security_extensions_enabled(void)
return 0;
}
-static unsigned long __init setup_vectors_base(void)
+unsigned long setup_vectors_base(void)
{
unsigned long base = 0, reg = get_cr();
diff --git a/arch/arm/mm/pmsa-v7.c b/arch/arm/mm/pmsa-v7.c
index 976df60ac426..e2853bfff74e 100644
--- a/arch/arm/mm/pmsa-v7.c
+++ b/arch/arm/mm/pmsa-v7.c
@@ -6,6 +6,7 @@
#include <linux/bitops.h>
#include <linux/memblock.h>
+#include <linux/string.h>
#include <asm/cacheflush.h>
#include <asm/cp15.h>
@@ -296,6 +297,7 @@ void __init adjust_lowmem_bounds_mpu(void)
}
}
+ memset(mem, 0, sizeof(mem));
num = allocate_region(mem_start, specified_mem_size, mem_max_regions, mem);
for (i = 0; i < num; i++) {
@@ -433,7 +435,7 @@ void __init mpu_setup(void)
/* Background */
err |= mpu_setup_region(region++, 0, 32,
- MPU_ACR_XN | MPU_RGN_STRONGLY_ORDERED | MPU_AP_PL1RW_PL0NA,
+ MPU_ACR_XN | MPU_RGN_STRONGLY_ORDERED | MPU_AP_PL1RW_PL0RW,
0, false);
#ifdef CONFIG_XIP_KERNEL
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 01d64c0b2563..d55d493f9a1e 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -567,7 +567,7 @@ __v7_setup_stack:
/*
* Standard v7 proc info content
*/
-.macro __v7_proc name, initfunc, mm_mmuflags = 0, io_mmuflags = 0, hwcaps = 0, proc_fns = v7_processor_functions
+.macro __v7_proc name, initfunc, mm_mmuflags = 0, io_mmuflags = 0, hwcaps = 0, proc_fns = v7_processor_functions, cache_fns = v7_cache_fns
ALT_SMP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \
PMD_SECT_AF | PMD_FLAGS_SMP | \mm_mmuflags)
ALT_UP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \
@@ -583,7 +583,7 @@ __v7_setup_stack:
.long \proc_fns
.long v7wbi_tlb_fns
.long v6_user_fns
- .long v7_cache_fns
+ .long \cache_fns
.endm
#ifndef CONFIG_ARM_LPAE
@@ -678,7 +678,7 @@ __v7_ca15mp_proc_info:
__v7_b15mp_proc_info:
.long 0x420f00f0
.long 0xff0ffff0
- __v7_proc __v7_b15mp_proc_info, __v7_b15mp_setup
+ __v7_proc __v7_b15mp_proc_info, __v7_b15mp_setup, cache_fns = b15_cache_fns
.size __v7_b15mp_proc_info, . - __v7_b15mp_proc_info
/*
diff --git a/arch/arm/mm/ptdump_debugfs.c b/arch/arm/mm/ptdump_debugfs.c
new file mode 100644
index 000000000000..be8d87be4b93
--- /dev/null
+++ b/arch/arm/mm/ptdump_debugfs.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <asm/ptdump.h>
+
+static int ptdump_show(struct seq_file *m, void *v)
+{
+ struct ptdump_info *info = m->private;
+
+ ptdump_walk_pgd(m, info);
+ return 0;
+}
+
+static int ptdump_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ptdump_show, inode->i_private);
+}
+
+static const struct file_operations ptdump_fops = {
+ .open = ptdump_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+int ptdump_debugfs_register(struct ptdump_info *info, const char *name)
+{
+ struct dentry *pe;
+
+ pe = debugfs_create_file(name, 0400, NULL, info, &ptdump_fops);
+ return pe ? 0 : -ENOMEM;
+
+}
diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c
index 52d1cd14fda4..e90cc8a08186 100644
--- a/arch/arm/probes/kprobes/core.c
+++ b/arch/arm/probes/kprobes/core.c
@@ -32,6 +32,7 @@
#include <linux/percpu.h>
#include <linux/bug.h>
#include <asm/patch.h>
+#include <asm/sections.h>
#include "../decode-arm.h"
#include "../decode-thumb.h"
@@ -64,9 +65,6 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
int is;
const struct decode_checker **checkers;
- if (in_exception_text(addr))
- return -EINVAL;
-
#ifdef CONFIG_THUMB2_KERNEL
thumb = true;
addr &= ~1; /* Bit 0 would normally be set to indicate Thumb code */
@@ -680,3 +678,13 @@ int __init arch_init_kprobes()
#endif
return 0;
}
+
+bool arch_within_kprobe_blacklist(unsigned long addr)
+{
+ void *a = (void *)addr;
+
+ return __in_irqentry_text(addr) ||
+ in_entry_text(addr) ||
+ in_idmap_text(addr) ||
+ memory_contains(__kprobes_text_start, __kprobes_text_end, a, 1);
+}
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b2b95f79c746..53612879fe56 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -91,6 +91,7 @@ config ARM64
select HAVE_ARCH_MMAP_RND_BITS
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
select HAVE_ARCH_SECCOMP_FILTER
+ select HAVE_ARCH_THREAD_STRUCT_WHITELIST
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_ARCH_VMAP_STACK
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index cee4ae25a5d1..6db43ebd648d 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -113,6 +113,16 @@ struct thread_struct {
struct debug_info debug; /* debugging */
};
+/*
+ * Everything usercopied to/from thread_struct is statically-sized, so
+ * no hardened usercopy whitelist is needed.
+ */
+static inline void arch_thread_struct_whitelist(unsigned long *offset,
+ unsigned long *size)
+{
+ *offset = *size = 0;
+}
+
#ifdef CONFIG_COMPAT
#define task_user_tls(t) \
({ \
diff --git a/arch/microblaze/Kconfig.platform b/arch/microblaze/Kconfig.platform
index 1b3d8c849101..6996f397c16c 100644
--- a/arch/microblaze/Kconfig.platform
+++ b/arch/microblaze/Kconfig.platform
@@ -8,6 +8,7 @@ menu "Platform options"
config OPT_LIB_FUNCTION
bool "Optimalized lib function"
+ depends on CPU_LITTLE_ENDIAN
default y
help
Allows turn on optimalized library function (memcpy and memmove).
diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile
index 830ee7d42fa0..d269dd4b8279 100644
--- a/arch/microblaze/Makefile
+++ b/arch/microblaze/Makefile
@@ -36,16 +36,21 @@ endif
CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_DIV) += -mno-xl-soft-div
CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_BARREL) += -mxl-barrel-shift
CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR) += -mxl-pattern-compare
-CPUFLAGS-$(CONFIG_BIG_ENDIAN) += -mbig-endian
-CPUFLAGS-$(CONFIG_LITTLE_ENDIAN) += -mlittle-endian
+
+ifdef CONFIG_CPU_BIG_ENDIAN
+KBUILD_CFLAGS += -mbig-endian
+KBUILD_AFLAGS += -mbig-endian
+LD += -EB
+else
+KBUILD_CFLAGS += -mlittle-endian
+KBUILD_AFLAGS += -mlittle-endian
+LD += -EL
+endif
CPUFLAGS-1 += $(call cc-option,-mcpu=v$(CPU_VER))
# r31 holds current when in kernel mode
-KBUILD_CFLAGS += -ffixed-r31 $(CPUFLAGS-1) $(CPUFLAGS-2)
-
-LDFLAGS :=
-LDFLAGS_vmlinux :=
+KBUILD_CFLAGS += -ffixed-r31 $(CPUFLAGS-y) $(CPUFLAGS-1) $(CPUFLAGS-2)
head-y := arch/microblaze/kernel/head.o
libs-y += arch/microblaze/lib/
diff --git a/arch/microblaze/include/asm/io.h b/arch/microblaze/include/asm/io.h
index 39b6315db82e..c7968139486f 100644
--- a/arch/microblaze/include/asm/io.h
+++ b/arch/microblaze/include/asm/io.h
@@ -36,7 +36,7 @@ extern resource_size_t isa_mem_base;
#ifdef CONFIG_MMU
#define page_to_bus(page) (page_to_phys(page))
-extern void iounmap(void __iomem *addr);
+extern void iounmap(volatile void __iomem *addr);
extern void __iomem *ioremap(phys_addr_t address, unsigned long size);
#define ioremap_nocache(addr, size) ioremap((addr), (size))
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
index 4c0599239915..7f525962cdfa 100644
--- a/arch/microblaze/mm/pgtable.c
+++ b/arch/microblaze/mm/pgtable.c
@@ -127,7 +127,7 @@ void __iomem *ioremap(phys_addr_t addr, unsigned long size)
}
EXPORT_SYMBOL(ioremap);
-void iounmap(void __iomem *addr)
+void iounmap(volatile void __iomem *addr)
{
if ((__force void *)addr > high_memory &&
(unsigned long) addr < ioremap_bot)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 73fcf592ee91..9d3329811cc1 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -143,6 +143,7 @@ config PPC
select ARCH_HAS_PMEM_API if PPC64
select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE
select ARCH_HAS_SG_CHAIN
+ select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UACCESS_FLUSHCACHE if PPC64
select ARCH_HAS_UBSAN_SANITIZE_ALL
@@ -150,6 +151,7 @@ config PPC
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_MIGHT_HAVE_PC_SERIO
+ select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF if PPC64
@@ -180,8 +182,6 @@ config PPC
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
- select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
- select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
select HAVE_CBPF_JIT if !PPC64
select HAVE_CONTEXT_TRACKING if PPC64
select HAVE_DEBUG_KMEMLEAK
@@ -868,6 +868,21 @@ config SECCOMP
If unsure, say Y. Only embedded should say N here.
+config PPC_MEM_KEYS
+ prompt "PowerPC Memory Protection Keys"
+ def_bool y
+ depends on PPC_BOOK3S_64
+ select ARCH_USES_HIGH_VMA_FLAGS
+ select ARCH_HAS_PKEYS
+ help
+ Memory Protection Keys provides a mechanism for enforcing
+ page-based protections, but without requiring modification of the
+ page tables when an application changes protection domains.
+
+ For details, see Documentation/vm/protection-keys.txt
+
+ If unsure, say y.
+
endmenu
config ISA_DMA_API
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 657c33cd4eee..c45424c64e19 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -90,6 +90,10 @@ config MSI_BITMAP_SELFTEST
depends on DEBUG_KERNEL
default n
+config PPC_IRQ_SOFT_MASK_DEBUG
+ bool "Include extra checks for powerpc irq soft masking"
+ default n
+
config XMON
bool "Include xmon kernel debugger"
depends on DEBUG_KERNEL
@@ -368,7 +372,7 @@ config PPC_PTDUMP
config PPC_HTDUMP
def_bool y
- depends on PPC_PTDUMP && PPC_BOOK3S
+ depends on PPC_PTDUMP && PPC_BOOK3S_64
config PPC_FAST_ENDIAN_SWITCH
bool "Deprecated fast endian-switch syscall"
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 1381693a4a51..ccd2556bdb53 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -63,6 +63,7 @@ UTS_MACHINE := $(subst $(space),,$(machine-y))
ifdef CONFIG_PPC32
KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
else
+KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/powerpc/kernel/module.lds
ifeq ($(call ld-ifversion, -ge, 225000000, y),y)
# Have the linker provide sfpr if possible.
# There is a corresponding test in arch/powerpc/lib/Makefile
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 08782f55b89f..ef6549e57157 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -108,10 +108,10 @@ src-wlib-y := string.S crt0.S stdio.c decompress.c main.c \
$(libfdt) libfdt-wrapper.c \
ns16550.c serial.c simple_alloc.c div64.S util.S \
elf_util.c $(zlib-y) devtree.c stdlib.c \
- oflib.c ofconsole.c cuboot.c cpm-serial.c \
- uartlite.c opal.c
+ oflib.c ofconsole.c cuboot.c
+
src-wlib-$(CONFIG_PPC_MPC52XX) += mpc52xx-psc.c
-src-wlib-$(CONFIG_PPC64_BOOT_WRAPPER) += opal-calls.S
+src-wlib-$(CONFIG_PPC64_BOOT_WRAPPER) += opal-calls.S opal.c
ifndef CONFIG_PPC64_BOOT_WRAPPER
src-wlib-y += crtsavres.S
endif
@@ -120,6 +120,8 @@ src-wlib-$(CONFIG_44x) += 4xx.c ebony.c bamboo.c
src-wlib-$(CONFIG_PPC_8xx) += mpc8xx.c planetcore.c fsl-soc.c
src-wlib-$(CONFIG_PPC_82xx) += pq2.c fsl-soc.c planetcore.c
src-wlib-$(CONFIG_EMBEDDED6xx) += mpsc.c mv64x60.c mv64x60_i2c.c ugecon.c fsl-soc.c
+src-wlib-$(CONFIG_XILINX_VIRTEX) += uartlite.c
+src-wlib-$(CONFIG_CPM) += cpm-serial.c
src-plat-y := of.c epapr.c
src-plat-$(CONFIG_40x) += fixed-head.S ep405.c cuboot-hotfoot.c \
diff --git a/arch/powerpc/boot/dts/a3m071.dts b/arch/powerpc/boot/dts/a3m071.dts
index bf81b8f9704c..187ce458d03a 100644
--- a/arch/powerpc/boot/dts/a3m071.dts
+++ b/arch/powerpc/boot/dts/a3m071.dts
@@ -105,24 +105,24 @@
reg = <0 0x0 0x02000000>;
compatible = "cfi-flash";
bank-width = <2>;
- partition@0x0 {
+ partition@0 {
label = "u-boot";
reg = <0x00000000 0x00040000>;
read-only;
};
- partition@0x00040000 {
+ partition@40000 {
label = "env";
reg = <0x00040000 0x00020000>;
};
- partition@0x00060000 {
+ partition@60000 {
label = "dtb";
reg = <0x00060000 0x00020000>;
};
- partition@0x00080000 {
+ partition@80000 {
label = "kernel";
reg = <0x00080000 0x00500000>;
};
- partition@0x00580000 {
+ partition@580000 {
label = "root";
reg = <0x00580000 0x00A80000>;
};
diff --git a/arch/powerpc/boot/dts/akebono.dts b/arch/powerpc/boot/dts/akebono.dts
index e61d5dc598c1..746779202a12 100644
--- a/arch/powerpc/boot/dts/akebono.dts
+++ b/arch/powerpc/boot/dts/akebono.dts
@@ -216,7 +216,7 @@
interrupts = <39 2>;
};
- IIC0: i2c@00000000 {
+ IIC0: i2c@0 {
compatible = "ibm,iic-476gtr", "ibm,iic";
reg = <0x0 0x00000020>;
interrupt-parent = <&MPIC>;
@@ -229,7 +229,7 @@
};
};
- IIC1: i2c@00000100 {
+ IIC1: i2c@100 {
compatible = "ibm,iic-476gtr", "ibm,iic";
reg = <0x100 0x00000020>;
interrupt-parent = <&MPIC>;
diff --git a/arch/powerpc/boot/dts/c2k.dts b/arch/powerpc/boot/dts/c2k.dts
index 1e32903cb0a8..27f169e3ade9 100644
--- a/arch/powerpc/boot/dts/c2k.dts
+++ b/arch/powerpc/boot/dts/c2k.dts
@@ -276,14 +276,14 @@
>;
};
- cpu-error@0070 {
+ cpu-error@70 {
compatible = "marvell,mv64360-cpu-error";
reg = <0x0070 0x10 0x0128 0x28>;
interrupts = <3>;
interrupt-parent = <&PIC>;
};
- sram-ctrl@0380 {
+ sram-ctrl@380 {
compatible = "marvell,mv64360-sram-ctrl";
reg = <0x0380 0x80>;
interrupts = <13>;
@@ -311,7 +311,7 @@
interrupt-parent = <&PIC>;
};
/* Devices attached to the device controller */
- devicebus@045c {
+ devicebus@45c {
#address-cells = <2>;
#size-cells = <1>;
compatible = "marvell,mv64306-devctrl";
diff --git a/arch/powerpc/boot/dts/currituck.dts b/arch/powerpc/boot/dts/currituck.dts
index 4191e1850ea1..f2ad5815f08d 100644
--- a/arch/powerpc/boot/dts/currituck.dts
+++ b/arch/powerpc/boot/dts/currituck.dts
@@ -108,7 +108,7 @@
reg = <0x50000000 0x4>;
};
- IIC0: i2c@00000000 {
+ IIC0: i2c@0 {
compatible = "ibm,iic-currituck", "ibm,iic";
reg = <0x0 0x00000014>;
interrupt-parent = <&MPIC>;
diff --git a/arch/powerpc/boot/dts/fsl/mpc8568mds.dts b/arch/powerpc/boot/dts/fsl/mpc8568mds.dts
index 01706a339603..bc3e8039bdc7 100644
--- a/arch/powerpc/boot/dts/fsl/mpc8568mds.dts
+++ b/arch/powerpc/boot/dts/fsl/mpc8568mds.dts
@@ -126,7 +126,7 @@
par_io@e0100 {
num-ports = <7>;
- pio1: ucc_pin@01 {
+ pio1: ucc_pin@1 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x4 0xa 0x1 0x0 0x2 0x0 /* TxD0 */
@@ -154,7 +154,7 @@
0x1 0x1f 0x2 0x0 0x3 0x0>; /* GTX125 */
};
- pio2: ucc_pin@02 {
+ pio2: ucc_pin@2 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x5 0xa 0x1 0x0 0x2 0x0 /* TxD0 */
@@ -228,22 +228,22 @@
/* These are the same PHYs as on
* gianfar's MDIO bus */
- qe_phy0: ethernet-phy@07 {
+ qe_phy0: ethernet-phy@7 {
interrupt-parent = <&mpic>;
interrupts = <1 1 0 0>;
reg = <0x7>;
};
- qe_phy1: ethernet-phy@01 {
+ qe_phy1: ethernet-phy@1 {
interrupt-parent = <&mpic>;
interrupts = <2 1 0 0>;
reg = <0x1>;
};
- qe_phy2: ethernet-phy@02 {
+ qe_phy2: ethernet-phy@2 {
interrupt-parent = <&mpic>;
interrupts = <1 1 0 0>;
reg = <0x2>;
};
- qe_phy3: ethernet-phy@03 {
+ qe_phy3: ethernet-phy@3 {
interrupt-parent = <&mpic>;
interrupts = <2 1 0 0>;
reg = <0x3>;
diff --git a/arch/powerpc/boot/dts/fsl/mpc8569mds.dts b/arch/powerpc/boot/dts/fsl/mpc8569mds.dts
index 76b2bd6f7742..d8367ceddea6 100644
--- a/arch/powerpc/boot/dts/fsl/mpc8569mds.dts
+++ b/arch/powerpc/boot/dts/fsl/mpc8569mds.dts
@@ -141,7 +141,7 @@
gpio-controller;
};
- pio1: ucc_pin@01 {
+ pio1: ucc_pin@1 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x2 0x1f 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */
@@ -161,7 +161,7 @@
0x2 0x14 0x1 0x0 0x2 0x0>; /* ENET1_GTXCLK */
};
- pio2: ucc_pin@02 {
+ pio2: ucc_pin@2 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x2 0x1f 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */
@@ -181,7 +181,7 @@
0x2 0x2 0x1 0x0 0x2 0x0>; /* ENET2_GTXCLK */
};
- pio3: ucc_pin@03 {
+ pio3: ucc_pin@3 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x2 0x1f 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */
@@ -201,7 +201,7 @@
0x2 0x19 0x1 0x0 0x2 0x0>; /* ENET3_GTXCLK */
};
- pio4: ucc_pin@04 {
+ pio4: ucc_pin@4 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x2 0x1f 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */
@@ -272,30 +272,30 @@
reg = <0x2120 0x18>;
compatible = "fsl,ucc-mdio";
- qe_phy0: ethernet-phy@07 {
+ qe_phy0: ethernet-phy@7 {
interrupt-parent = <&mpic>;
interrupts = <1 1 0 0>;
reg = <0x7>;
};
- qe_phy1: ethernet-phy@01 {
+ qe_phy1: ethernet-phy@1 {
interrupt-parent = <&mpic>;
interrupts = <2 1 0 0>;
reg = <0x1>;
};
- qe_phy2: ethernet-phy@02 {
+ qe_phy2: ethernet-phy@2 {
interrupt-parent = <&mpic>;
interrupts = <3 1 0 0>;
reg = <0x2>;
};
- qe_phy3: ethernet-phy@03 {
+ qe_phy3: ethernet-phy@3 {
interrupt-parent = <&mpic>;
interrupts = <4 1 0 0>;
reg = <0x3>;
};
- qe_phy5: ethernet-phy@04 {
+ qe_phy5: ethernet-phy@4 {
reg = <0x04>;
};
- qe_phy7: ethernet-phy@06 {
+ qe_phy7: ethernet-phy@6 {
reg = <0x6>;
};
tbi1: tbi-phy@11 {
diff --git a/arch/powerpc/boot/dts/fsl/p1021mds.dts b/arch/powerpc/boot/dts/fsl/p1021mds.dts
index 291454c75dda..1047802f4d2a 100644
--- a/arch/powerpc/boot/dts/fsl/p1021mds.dts
+++ b/arch/powerpc/boot/dts/fsl/p1021mds.dts
@@ -202,7 +202,7 @@
ranges = <0x0 0xe0100 0x60>;
device_type = "par_io";
num-ports = <3>;
- pio1: ucc_pin@01 {
+ pio1: ucc_pin@1 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x1 0x13 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */
@@ -225,7 +225,7 @@
0x0 0x10 0x2 0x0 0x2 0x0>; /* ENET1_COL */
};
- pio2: ucc_pin@02 {
+ pio2: ucc_pin@2 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x1 0x13 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */
@@ -296,7 +296,7 @@
interrupts = <4 1 0 0>;
reg = <0x0>;
};
- qe_phy1: ethernet-phy@03 {
+ qe_phy1: ethernet-phy@3 {
interrupt-parent = <&mpic>;
interrupts = <5 1 0 0>;
reg = <0x3>;
diff --git a/arch/powerpc/boot/dts/fsl/p1025rdb.dtsi b/arch/powerpc/boot/dts/fsl/p1025rdb.dtsi
index d44bb12debb0..0a5434a631c3 100644
--- a/arch/powerpc/boot/dts/fsl/p1025rdb.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1025rdb.dtsi
@@ -245,7 +245,7 @@
ranges = <0x0 0xe0100 0x60>;
device_type = "par_io";
num-ports = <3>;
- pio1: ucc_pin@01 {
+ pio1: ucc_pin@1 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x1 0x13 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */
@@ -268,7 +268,7 @@
0x0 0x10 0x2 0x0 0x2 0x0>; /* ENET1_COL */
};
- pio2: ucc_pin@02 {
+ pio2: ucc_pin@2 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x1 0x13 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */
@@ -283,7 +283,7 @@
0x1 0x8 0x2 0x0 0x2 0x0>; /* ENET5_RX_ER_SER5_CD_B */
};
- pio3: ucc_pin@03 {
+ pio3: ucc_pin@3 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x0 0x16 0x2 0x0 0x2 0x0 /* SER7_CD_B*/
@@ -293,7 +293,7 @@
0x0 0x15 0x1 0x0 0x2 0x0>; /* SER7_TXD0*/
};
- pio4: ucc_pin@04 {
+ pio4: ucc_pin@4 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x1 0x0 0x2 0x0 0x2 0x0 /* SER3_CD_B*/
diff --git a/arch/powerpc/boot/dts/fsl/p1025rdb_32b.dts b/arch/powerpc/boot/dts/fsl/p1025rdb_32b.dts
index b15acbaea34b..ea33b57f8774 100644
--- a/arch/powerpc/boot/dts/fsl/p1025rdb_32b.dts
+++ b/arch/powerpc/boot/dts/fsl/p1025rdb_32b.dts
@@ -106,7 +106,7 @@
interrupts = <4 1 0 0>;
reg = <0x6>;
};
- qe_phy1: ethernet-phy@03 {
+ qe_phy1: ethernet-phy@3 {
interrupt-parent = <&mpic>;
interrupts = <5 1 0 0>;
reg = <0x3>;
diff --git a/arch/powerpc/boot/dts/fsl/p1025twr.dtsi b/arch/powerpc/boot/dts/fsl/p1025twr.dtsi
index 08816fb474f5..ab75b8f29ae2 100644
--- a/arch/powerpc/boot/dts/fsl/p1025twr.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1025twr.dtsi
@@ -172,7 +172,7 @@
ranges = <0x0 0xe0100 0x60>;
device_type = "par_io";
num-ports = <3>;
- pio1: ucc_pin@01 {
+ pio1: ucc_pin@1 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x1 0x13 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */
@@ -195,7 +195,7 @@
0x0 0x10 0x2 0x0 0x2 0x0>; /* ENET1_COL */
};
- pio2: ucc_pin@02 {
+ pio2: ucc_pin@2 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x1 0x13 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */
@@ -210,7 +210,7 @@
0x1 0x8 0x2 0x0 0x2 0x0>; /* ENET5_RX_ER_SER5_CD_B */
};
- pio3: ucc_pin@03 {
+ pio3: ucc_pin@3 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x0 0x16 0x2 0x0 0x2 0x0 /* SER7_CD_B*/
@@ -220,7 +220,7 @@
0x0 0x15 0x1 0x0 0x2 0x0>; /* SER7_TXD0*/
};
- pio4: ucc_pin@04 {
+ pio4: ucc_pin@4 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x1 0x0 0x2 0x0 0x2 0x0 /* SER3_CD_B*/
diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb.dts b/arch/powerpc/boot/dts/fsl/t1040rdb.dts
index 621f2c6ee6ad..65ff34c49025 100644
--- a/arch/powerpc/boot/dts/fsl/t1040rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t1040rdb.dts
@@ -61,7 +61,7 @@
};
mdio@fc000 {
- phy_sgmii_2: ethernet-phy@03 {
+ phy_sgmii_2: ethernet-phy@3 {
reg = <0x03>;
};
};
diff --git a/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts b/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts
index fcd2aeb5b8ac..4fa15f48a4c3 100644
--- a/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts
@@ -77,23 +77,23 @@
};
mdio0: mdio@fc000 {
- phy_sgmii_0: ethernet-phy@02 {
+ phy_sgmii_0: ethernet-phy@2 {
reg = <0x02>;
};
- phy_sgmii_1: ethernet-phy@03 {
+ phy_sgmii_1: ethernet-phy@3 {
reg = <0x03>;
};
- phy_sgmii_2: ethernet-phy@01 {
+ phy_sgmii_2: ethernet-phy@1 {
reg = <0x01>;
};
- phy_rgmii_0: ethernet-phy@04 {
+ phy_rgmii_0: ethernet-phy@4 {
reg = <0x04>;
};
- phy_rgmii_1: ethernet-phy@05 {
+ phy_rgmii_1: ethernet-phy@5 {
reg = <0x05>;
};
};
diff --git a/arch/powerpc/boot/dts/fsl/t1042rdb.dts b/arch/powerpc/boot/dts/fsl/t1042rdb.dts
index 2c138627b1b4..3ebb712224cb 100644
--- a/arch/powerpc/boot/dts/fsl/t1042rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t1042rdb.dts
@@ -59,7 +59,7 @@
};
mdio@fc000 {
- phy_sgmii_2: ethernet-phy@03 {
+ phy_sgmii_2: ethernet-phy@3 {
reg = <0x03>;
};
};
diff --git a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi
index 5fdddbd2a62b..099a598c74c0 100644
--- a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi
@@ -148,15 +148,15 @@
};
mdio0: mdio@fc000 {
- phy_sgmii_2: ethernet-phy@03 {
+ phy_sgmii_2: ethernet-phy@3 {
reg = <0x03>;
};
- phy_rgmii_0: ethernet-phy@01 {
+ phy_rgmii_0: ethernet-phy@1 {
reg = <0x01>;
};
- phy_rgmii_1: ethernet-phy@02 {
+ phy_rgmii_1: ethernet-phy@2 {
reg = <0x02>;
};
};
diff --git a/arch/powerpc/boot/dts/fsp2.dts b/arch/powerpc/boot/dts/fsp2.dts
index f10a64aeb83b..6560283c5aec 100644
--- a/arch/powerpc/boot/dts/fsp2.dts
+++ b/arch/powerpc/boot/dts/fsp2.dts
@@ -583,21 +583,21 @@
};
};
- OHCI1: ohci@02040000 {
+ OHCI1: ohci@2040000 {
compatible = "ohci-le";
reg = <0x02040000 0xa0>;
interrupt-parent = <&UIC1_3>;
interrupts = <28 0x8 29 0x8>;
};
- OHCI2: ohci@02080000 {
+ OHCI2: ohci@2080000 {
compatible = "ohci-le";
reg = <0x02080000 0xa0>;
interrupt-parent = <&UIC1_3>;
interrupts = <30 0x8 31 0x8>;
};
- EHCI: ehci@02000000 {
+ EHCI: ehci@2000000 {
compatible = "usb-ehci";
reg = <0x02000000 0xa4>;
interrupt-parent = <&UIC1_3>;
diff --git a/arch/powerpc/boot/dts/gamecube.dts b/arch/powerpc/boot/dts/gamecube.dts
index ef3be0e58b02..58d06c9ee08b 100644
--- a/arch/powerpc/boot/dts/gamecube.dts
+++ b/arch/powerpc/boot/dts/gamecube.dts
@@ -54,13 +54,13 @@
ranges = <0x0c000000 0x0c000000 0x00010000>;
interrupt-parent = <&PIC>;
- video@0c002000 {
+ video@c002000 {
compatible = "nintendo,flipper-vi";
reg = <0x0c002000 0x100>;
interrupts = <8>;
};
- processor-interface@0c003000 {
+ processor-interface@c003000 {
compatible = "nintendo,flipper-pi";
reg = <0x0c003000 0x100>;
@@ -71,7 +71,7 @@
};
};
- dsp@0c005000 {
+ dsp@c005000 {
#address-cells = <1>;
#size-cells = <1>;
compatible = "nintendo,flipper-dsp";
@@ -84,26 +84,26 @@
};
};
- disk@0c006000 {
+ disk@c006000 {
compatible = "nintendo,flipper-di";
reg = <0x0c006000 0x40>;
interrupts = <2>;
};
- audio@0c006c00 {
+ audio@c006c00 {
compatible = "nintendo,flipper-ai";
reg = <0x0c006c00 0x20>;
interrupts = <6>;
};
- gamepad-controller@0c006400 {
+ gamepad-controller@c006400 {
compatible = "nintendo,flipper-si";
reg = <0x0c006400 0x100>;
interrupts = <3>;
};
/* External Interface bus */
- exi@0c006800 {
+ exi@c006800 {
compatible = "nintendo,flipper-exi";
reg = <0x0c006800 0x40>;
virtual-reg = <0x0c006800>;
diff --git a/arch/powerpc/boot/dts/haleakala.dts b/arch/powerpc/boot/dts/haleakala.dts
index 2b256694eca6..cb16dad43c92 100644
--- a/arch/powerpc/boot/dts/haleakala.dts
+++ b/arch/powerpc/boot/dts/haleakala.dts
@@ -237,7 +237,7 @@
};
};
- PCIE0: pciex@0a0000000 {
+ PCIE0: pciex@a0000000 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/kilauea.dts b/arch/powerpc/boot/dts/kilauea.dts
index 5ba7f01e2a29..2a3413221cc1 100644
--- a/arch/powerpc/boot/dts/kilauea.dts
+++ b/arch/powerpc/boot/dts/kilauea.dts
@@ -322,7 +322,7 @@
};
};
- PCIE0: pciex@0a0000000 {
+ PCIE0: pciex@a0000000 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
@@ -363,7 +363,7 @@
0x0 0x0 0x0 0x4 &UIC2 0x3 0x4 /* swizzled int D */>;
};
- PCIE1: pciex@0c0000000 {
+ PCIE1: pciex@c0000000 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/kmeter1.dts b/arch/powerpc/boot/dts/kmeter1.dts
index 983aee185793..9fa33d9ba966 100644
--- a/arch/powerpc/boot/dts/kmeter1.dts
+++ b/arch/powerpc/boot/dts/kmeter1.dts
@@ -434,27 +434,27 @@
compatible = "fsl,ucc-mdio";
/* Piggy2 (UCC4, MDIO 0x00, RMII) */
- phy_piggy2: ethernet-phy@00 {
+ phy_piggy2: ethernet-phy@0 {
reg = <0x0>;
};
/* Eth-1 (UCC5, MDIO 0x08, RMII) */
- phy_eth1: ethernet-phy@08 {
+ phy_eth1: ethernet-phy@8 {
reg = <0x08>;
};
/* Eth-2 (UCC6, MDIO 0x09, RMII) */
- phy_eth2: ethernet-phy@09 {
+ phy_eth2: ethernet-phy@9 {
reg = <0x09>;
};
/* Eth-3 (UCC7, MDIO 0x0a, RMII) */
- phy_eth3: ethernet-phy@0a {
+ phy_eth3: ethernet-phy@a {
reg = <0x0a>;
};
/* Eth-4 (UCC8, MDIO 0x0b, RMII) */
- phy_eth4: ethernet-phy@0b {
+ phy_eth4: ethernet-phy@b {
reg = <0x0b>;
};
diff --git a/arch/powerpc/boot/dts/makalu.dts b/arch/powerpc/boot/dts/makalu.dts
index 63d48b632c84..bf8fe1629392 100644
--- a/arch/powerpc/boot/dts/makalu.dts
+++ b/arch/powerpc/boot/dts/makalu.dts
@@ -268,7 +268,7 @@
};
};
- PCIE0: pciex@0a0000000 {
+ PCIE0: pciex@a0000000 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
@@ -309,7 +309,7 @@
0x0 0x0 0x0 0x4 &UIC2 0x3 0x4 /* swizzled int D */>;
};
- PCIE1: pciex@0c0000000 {
+ PCIE1: pciex@c0000000 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/mpc832x_mds.dts b/arch/powerpc/boot/dts/mpc832x_mds.dts
index 0793cdf0d46e..49c7d657118a 100644
--- a/arch/powerpc/boot/dts/mpc832x_mds.dts
+++ b/arch/powerpc/boot/dts/mpc832x_mds.dts
@@ -186,7 +186,7 @@
device_type = "par_io";
num-ports = <7>;
- pio3: ucc_pin@03 {
+ pio3: ucc_pin@3 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
3 4 3 0 2 0 /* MDIO */
@@ -208,7 +208,7 @@
1 12 1 0 1 0 /* TX_EN */
1 13 2 0 1 0>; /* CRS */
};
- pio4: ucc_pin@04 {
+ pio4: ucc_pin@4 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
3 31 2 0 1 0 /* RX_CLK (CLK7) */
@@ -228,7 +228,7 @@
1 30 1 0 1 0 /* TX_EN */
1 31 2 0 1 0>; /* CRS */
};
- pio5: ucc_pin@05 {
+ pio5: ucc_pin@5 {
pio-map = <
/*
* open has
@@ -352,12 +352,12 @@
reg = <0x2320 0x18>;
compatible = "fsl,ucc-mdio";
- phy3: ethernet-phy@03 {
+ phy3: ethernet-phy@3 {
interrupt-parent = <&ipic>;
interrupts = <17 0x8>;
reg = <0x3>;
};
- phy4: ethernet-phy@04 {
+ phy4: ethernet-phy@4 {
interrupt-parent = <&ipic>;
interrupts = <18 0x8>;
reg = <0x4>;
diff --git a/arch/powerpc/boot/dts/mpc832x_rdb.dts b/arch/powerpc/boot/dts/mpc832x_rdb.dts
index 91df1eb16667..647cae14c16d 100644
--- a/arch/powerpc/boot/dts/mpc832x_rdb.dts
+++ b/arch/powerpc/boot/dts/mpc832x_rdb.dts
@@ -175,7 +175,7 @@
gpio-controller;
};
- ucc2pio:ucc_pin@02 {
+ ucc2pio:ucc_pin@2 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
3 4 3 0 2 0 /* MDIO */
@@ -197,7 +197,7 @@
0 30 1 0 1 0 /* TX_EN */
0 31 2 0 1 0>; /* CRS */
};
- ucc3pio:ucc_pin@03 {
+ ucc3pio:ucc_pin@3 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0 13 2 0 1 0 /* RX_CLK (CLK9) */
@@ -310,12 +310,12 @@
reg = <0x3120 0x18>;
compatible = "fsl,ucc-mdio";
- phy00:ethernet-phy@00 {
+ phy00:ethernet-phy@0 {
interrupt-parent = <&ipic>;
interrupts = <0>;
reg = <0x0>;
};
- phy04:ethernet-phy@04 {
+ phy04:ethernet-phy@4 {
interrupt-parent = <&ipic>;
interrupts = <0>;
reg = <0x4>;
diff --git a/arch/powerpc/boot/dts/mpc836x_mds.dts b/arch/powerpc/boot/dts/mpc836x_mds.dts
index ecb6ccd3a6aa..539fd9f72eda 100644
--- a/arch/powerpc/boot/dts/mpc836x_mds.dts
+++ b/arch/powerpc/boot/dts/mpc836x_mds.dts
@@ -228,7 +228,7 @@
gpio-controller;
};
- pio1: ucc_pin@01 {
+ pio1: ucc_pin@1 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0 3 1 0 1 0 /* TxD0 */
@@ -255,7 +255,7 @@
2 9 1 0 3 0 /* GTX_CLK - CLK10 */
2 8 2 0 1 0>; /* GTX125 - CLK9 */
};
- pio2: ucc_pin@02 {
+ pio2: ucc_pin@2 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0 17 1 0 1 0 /* TxD0 */
@@ -393,12 +393,12 @@
reg = <0x2120 0x18>;
compatible = "fsl,ucc-mdio";
- phy0: ethernet-phy@00 {
+ phy0: ethernet-phy@0 {
interrupt-parent = <&ipic>;
interrupts = <17 0x8>;
reg = <0x0>;
};
- phy1: ethernet-phy@01 {
+ phy1: ethernet-phy@1 {
interrupt-parent = <&ipic>;
interrupts = <18 0x8>;
reg = <0x1>;
diff --git a/arch/powerpc/boot/dts/sbc8548-altflash.dts b/arch/powerpc/boot/dts/sbc8548-altflash.dts
index 0b38a0defd2c..8967a56adad4 100644
--- a/arch/powerpc/boot/dts/sbc8548-altflash.dts
+++ b/arch/powerpc/boot/dts/sbc8548-altflash.dts
@@ -40,12 +40,12 @@
compatible = "intel,JS28F128", "cfi-flash";
bank-width = <4>;
device-width = <1>;
- partition@0x0 {
+ partition@0 {
label = "space";
/* FC000000 -> FFEFFFFF */
reg = <0x00000000 0x03f00000>;
};
- partition@0x03f00000 {
+ partition@3f00000 {
label = "bootloader";
/* FFF00000 -> FFFFFFFF */
reg = <0x03f00000 0x00100000>;
@@ -95,12 +95,12 @@
reg = <0x6 0x0 0x800000>;
bank-width = <1>;
device-width = <1>;
- partition@0x0 {
+ partition@0 {
label = "space";
/* EF800000 -> EFF9FFFF */
reg = <0x00000000 0x007a0000>;
};
- partition@0x7a0000 {
+ partition@7a0000 {
label = "bootloader";
/* EFFA0000 -> EFFFFFFF */
reg = <0x007a0000 0x00060000>;
diff --git a/arch/powerpc/boot/dts/sbc8548.dts b/arch/powerpc/boot/dts/sbc8548.dts
index 1df2a0955668..9bdb828a504e 100644
--- a/arch/powerpc/boot/dts/sbc8548.dts
+++ b/arch/powerpc/boot/dts/sbc8548.dts
@@ -38,12 +38,12 @@
reg = <0x0 0x0 0x800000>;
bank-width = <1>;
device-width = <1>;
- partition@0x0 {
+ partition@0 {
label = "space";
/* FF800000 -> FFF9FFFF */
reg = <0x00000000 0x007a0000>;
};
- partition@0x7a0000 {
+ partition@7a0000 {
label = "bootloader";
/* FFFA0000 -> FFFFFFFF */
reg = <0x007a0000 0x00060000>;
@@ -92,12 +92,12 @@
compatible = "intel,JS28F128", "cfi-flash";
bank-width = <4>;
device-width = <1>;
- partition@0x0 {
+ partition@0 {
label = "space";
/* EC000000 -> EFEFFFFF */
reg = <0x00000000 0x03f00000>;
};
- partition@0x03f00000 {
+ partition@3f00000 {
label = "bootloader";
/* EFF00000 -> EFFFFFFF */
reg = <0x03f00000 0x00100000>;
diff --git a/arch/powerpc/boot/dts/wii.dts b/arch/powerpc/boot/dts/wii.dts
index 77528c9a8dbd..17a5babb098d 100644
--- a/arch/powerpc/boot/dts/wii.dts
+++ b/arch/powerpc/boot/dts/wii.dts
@@ -65,14 +65,14 @@
0x0d800000 0x0d800000 0x00800000>;
interrupt-parent = <&PIC0>;
- video@0c002000 {
+ video@c002000 {
compatible = "nintendo,hollywood-vi",
"nintendo,flipper-vi";
reg = <0x0c002000 0x100>;
interrupts = <8>;
};
- processor-interface@0c003000 {
+ processor-interface@c003000 {
compatible = "nintendo,hollywood-pi",
"nintendo,flipper-pi";
reg = <0x0c003000 0x100>;
@@ -84,7 +84,7 @@
};
};
- dsp@0c005000 {
+ dsp@c005000 {
#address-cells = <1>;
#size-cells = <1>;
compatible = "nintendo,hollywood-dsp",
@@ -93,14 +93,14 @@
interrupts = <6>;
};
- gamepad-controller@0d006400 {
+ gamepad-controller@d006400 {
compatible = "nintendo,hollywood-si",
"nintendo,flipper-si";
reg = <0x0d006400 0x100>;
interrupts = <3>;
};
- audio@0c006c00 {
+ audio@c006c00 {
compatible = "nintendo,hollywood-ai",
"nintendo,flipper-ai";
reg = <0x0d006c00 0x20>;
@@ -108,7 +108,7 @@
};
/* External Interface bus */
- exi@0d006800 {
+ exi@d006800 {
compatible = "nintendo,hollywood-exi",
"nintendo,flipper-exi";
reg = <0x0d006800 0x40>;
@@ -116,7 +116,7 @@
interrupts = <4>;
};
- usb@0d040000 {
+ usb@d040000 {
compatible = "nintendo,hollywood-usb-ehci",
"usb-ehci";
reg = <0x0d040000 0x100>;
@@ -124,7 +124,7 @@
interrupt-parent = <&PIC1>;
};
- usb@0d050000 {
+ usb@d050000 {
compatible = "nintendo,hollywood-usb-ohci",
"usb-ohci";
reg = <0x0d050000 0x100>;
@@ -132,7 +132,7 @@
interrupt-parent = <&PIC1>;
};
- usb@0d060000 {
+ usb@d060000 {
compatible = "nintendo,hollywood-usb-ohci",
"usb-ohci";
reg = <0x0d060000 0x100>;
@@ -140,7 +140,7 @@
interrupt-parent = <&PIC1>;
};
- sd@0d070000 {
+ sd@d070000 {
compatible = "nintendo,hollywood-sdhci",
"sdhci";
reg = <0x0d070000 0x200>;
@@ -148,7 +148,7 @@
interrupt-parent = <&PIC1>;
};
- sdio@0d080000 {
+ sdio@d080000 {
compatible = "nintendo,hollywood-sdhci",
"sdhci";
reg = <0x0d080000 0x200>;
@@ -156,14 +156,14 @@
interrupt-parent = <&PIC1>;
};
- ipc@0d000000 {
+ ipc@d000000 {
compatible = "nintendo,hollywood-ipc";
reg = <0x0d000000 0x10>;
interrupts = <30>;
interrupt-parent = <&PIC1>;
};
- PIC1: pic1@0d800030 {
+ PIC1: pic1@d800030 {
#interrupt-cells = <1>;
compatible = "nintendo,hollywood-pic";
reg = <0x0d800030 0x10>;
@@ -171,7 +171,7 @@
interrupts = <14>;
};
- GPIO: gpio@0d8000c0 {
+ GPIO: gpio@d8000c0 {
#gpio-cells = <2>;
compatible = "nintendo,hollywood-gpio";
reg = <0x0d8000c0 0x40>;
@@ -203,12 +203,12 @@
*/
};
- control@0d800100 {
+ control@d800100 {
compatible = "nintendo,hollywood-control";
reg = <0x0d800100 0x300>;
};
- disk@0d806000 {
+ disk@d806000 {
compatible = "nintendo,hollywood-di";
reg = <0x0d806000 0x40>;
interrupts = <2>;
diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c
index 7b5c02b1afd0..88955095ec07 100644
--- a/arch/powerpc/boot/serial.c
+++ b/arch/powerpc/boot/serial.c
@@ -124,20 +124,26 @@ int serial_console_init(void)
else if (dt_is_compatible(devp, "marvell,mv64360-mpsc"))
rc = mpsc_console_init(devp, &serial_cd);
#endif
+#ifdef CONFIG_CPM
else if (dt_is_compatible(devp, "fsl,cpm1-scc-uart") ||
dt_is_compatible(devp, "fsl,cpm1-smc-uart") ||
dt_is_compatible(devp, "fsl,cpm2-scc-uart") ||
dt_is_compatible(devp, "fsl,cpm2-smc-uart"))
rc = cpm_console_init(devp, &serial_cd);
+#endif
#ifdef CONFIG_PPC_MPC52XX
else if (dt_is_compatible(devp, "fsl,mpc5200-psc-uart"))
rc = mpc5200_psc_console_init(devp, &serial_cd);
#endif
+#ifdef CONFIG_XILINX_VIRTEX
else if (dt_is_compatible(devp, "xlnx,opb-uartlite-1.00.b") ||
dt_is_compatible(devp, "xlnx,xps-uartlite-1.00.a"))
rc = uartlite_console_init(devp, &serial_cd);
+#endif
+#ifdef CONFIG_PPC64_BOOT_WRAPPER
else if (dt_is_compatible(devp, "ibm,opal-console-raw"))
rc = opal_console_init(devp, &serial_cd);
+#endif
/* Add other serial console driver calls here */
diff --git a/arch/powerpc/configs/mpc866_ads_defconfig b/arch/powerpc/configs/mpc866_ads_defconfig
index f1f176c29fa3..5320735395e7 100644
--- a/arch/powerpc/configs/mpc866_ads_defconfig
+++ b/arch/powerpc/configs/mpc866_ads_defconfig
@@ -13,7 +13,6 @@ CONFIG_EXPERT=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_MPC86XADS=y
CONFIG_8xx_COPYBACK=y
-CONFIG_8xx_CPU6=y
CONFIG_GEN_RTC=y
CONFIG_HZ_1000=y
CONFIG_MATH_EMULATION=y
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index 73dab7a37386..9e92aa6a52ba 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -96,6 +96,7 @@ CONFIG_BLK_DEV_NBD=m
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=65536
CONFIG_VIRTIO_BLK=m
+CONFIG_BLK_DEV_NVME=y
CONFIG_IDE=y
CONFIG_BLK_DEV_IDECD=y
CONFIG_BLK_DEV_GENERIC=y
@@ -112,6 +113,7 @@ CONFIG_SCSI_CXGB3_ISCSI=m
CONFIG_SCSI_CXGB4_ISCSI=m
CONFIG_SCSI_BNX2_ISCSI=m
CONFIG_BE2ISCSI=m
+CONFIG_SCSI_AACRAID=y
CONFIG_SCSI_MPT2SAS=m
CONFIG_SCSI_SYM53C8XX_2=m
CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 016579ef16d3..30a155c0a6b0 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -311,6 +311,29 @@ static inline int pte_present(pte_t pte)
return pte_val(pte) & _PAGE_PRESENT;
}
+/*
+ * We only find page table entry in the last level
+ * Hence no need for other accessors
+ */
+#define pte_access_permitted pte_access_permitted
+static inline bool pte_access_permitted(pte_t pte, bool write)
+{
+ unsigned long pteval = pte_val(pte);
+ /*
+ * A read-only access is controlled by _PAGE_USER bit.
+ * We have _PAGE_READ set for WRITE and EXECUTE
+ */
+ unsigned long need_pte_bits = _PAGE_PRESENT | _PAGE_USER;
+
+ if (write)
+ need_pte_bits |= _PAGE_WRITE;
+
+ if ((pteval & need_pte_bits) != need_pte_bits)
+ return false;
+
+ return true;
+}
+
/* Conversion functions: convert a page and protection to a page entry,
* and a page entry and page directory to the page they refer to.
*
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 2d9df40446f6..949d691094a4 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -17,6 +17,12 @@
#define H_PUD_TABLE_SIZE (sizeof(pud_t) << H_PUD_INDEX_SIZE)
#define H_PGD_TABLE_SIZE (sizeof(pgd_t) << H_PGD_INDEX_SIZE)
+#define H_PAGE_F_GIX_SHIFT 53
+#define H_PAGE_F_SECOND _RPAGE_RPN44 /* HPTE is in 2ndary HPTEG */
+#define H_PAGE_F_GIX (_RPAGE_RPN43 | _RPAGE_RPN42 | _RPAGE_RPN41)
+#define H_PAGE_BUSY _RPAGE_RSV1 /* software: PTE & hash are busy */
+#define H_PAGE_HASHPTE _RPAGE_RSV2 /* software: PTE & hash are busy */
+
/* PTE flags to conserve for HPTE identification */
#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | \
H_PAGE_F_SECOND | H_PAGE_F_GIX)
@@ -49,6 +55,20 @@ static inline int hash__hugepd_ok(hugepd_t hpd)
}
#endif
+/*
+ * 4K PTE format is different from 64K PTE format. Saving the hash_slot is just
+ * a matter of returning the PTE bits that need to be modified. On 64K PTE,
+ * things are a little more involved and hence needs many more parameters to
+ * accomplish the same. However we want to abstract this out from the caller by
+ * keeping the prototype consistent across the two formats.
+ */
+static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
+ unsigned int subpg_index, unsigned long hidx)
+{
+ return (hidx << H_PAGE_F_GIX_SHIFT) &
+ (H_PAGE_F_SECOND | H_PAGE_F_GIX);
+}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline char *get_hpte_slot_array(pmd_t *pmdp)
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index cb46d1034f33..338b7da468ce 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -13,21 +13,17 @@
*/
#define H_PAGE_COMBO _RPAGE_RPN0 /* this is a combo 4k page */
#define H_PAGE_4K_PFN _RPAGE_RPN1 /* PFN is for a single 4k page */
+#define H_PAGE_BUSY _RPAGE_RPN44 /* software: PTE & hash are busy */
+#define H_PAGE_HASHPTE _RPAGE_RPN43 /* PTE has associated HPTE */
+
/*
* We need to differentiate between explicit huge page and THP huge
* page, since THP huge page also need to track real subpage details
*/
#define H_PAGE_THP_HUGE H_PAGE_4K_PFN
-/*
- * Used to track subpage group valid if H_PAGE_COMBO is set
- * This overloads H_PAGE_F_GIX and H_PAGE_F_SECOND
- */
-#define H_PAGE_COMBO_VALID (H_PAGE_F_GIX | H_PAGE_F_SECOND)
-
/* PTE flags to conserve for HPTE identification */
-#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_F_SECOND | \
- H_PAGE_F_GIX | H_PAGE_HASHPTE | H_PAGE_COMBO)
+#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | H_PAGE_COMBO)
/*
* we support 16 fragments per PTE page of 64K size.
*/
@@ -55,24 +51,57 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
unsigned long *hidxp;
rpte.pte = pte;
- rpte.hidx = 0;
- if (pte_val(pte) & H_PAGE_COMBO) {
- /*
- * Make sure we order the hidx load against the H_PAGE_COMBO
- * check. The store side ordering is done in __hash_page_4K
- */
- smp_rmb();
- hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
- rpte.hidx = *hidxp;
- }
+
+ /*
+ * Ensure that we do not read the hidx before we read the PTE. Because
+ * the writer side is expected to finish writing the hidx first followed
+ * by the PTE, by using smp_wmb(). pte_set_hash_slot() ensures that.
+ */
+ smp_rmb();
+
+ hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
+ rpte.hidx = *hidxp;
return rpte;
}
+/*
+ * shift the hidx representation by one-modulo-0xf; i.e hidx 0 is respresented
+ * as 1, 1 as 2,... , and 0xf as 0. This convention lets us represent a
+ * invalid hidx 0xf with a 0x0 bit value. PTEs are anyway zero'd when
+ * allocated. We dont have to zero them gain; thus save on the initialization.
+ */
+#define HIDX_UNSHIFT_BY_ONE(x) ((x + 0xfUL) & 0xfUL) /* shift backward by one */
+#define HIDX_SHIFT_BY_ONE(x) ((x + 0x1UL) & 0xfUL) /* shift forward by one */
+#define HIDX_BITS(x, index) (x << (index << 2))
+#define BITS_TO_HIDX(x, index) ((x >> (index << 2)) & 0xfUL)
+#define INVALID_RPTE_HIDX 0x0UL
+
static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
{
- if ((pte_val(rpte.pte) & H_PAGE_COMBO))
- return (rpte.hidx >> (index<<2)) & 0xf;
- return (pte_val(rpte.pte) >> H_PAGE_F_GIX_SHIFT) & 0xf;
+ return HIDX_UNSHIFT_BY_ONE(BITS_TO_HIDX(rpte.hidx, index));
+}
+
+/*
+ * Commit the hidx and return PTE bits that needs to be modified. The caller is
+ * expected to modify the PTE bits accordingly and commit the PTE to memory.
+ */
+static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
+ unsigned int subpg_index, unsigned long hidx)
+{
+ unsigned long *hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
+
+ rpte.hidx &= ~HIDX_BITS(0xfUL, subpg_index);
+ *hidxp = rpte.hidx | HIDX_BITS(HIDX_SHIFT_BY_ONE(hidx), subpg_index);
+
+ /*
+ * Anyone reading PTE must ensure hidx bits are read after reading the
+ * PTE by using the read-side barrier smp_rmb(). __real_pte() can be
+ * used for that.
+ */
+ smp_wmb();
+
+ /* No PTE bits to be modified, return 0x0UL */
+ return 0x0UL;
}
#define __rpte_to_pte(r) ((r).pte)
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index ecb1239d74f4..0920eff731b3 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -9,11 +9,6 @@
*
*/
#define H_PTE_NONE_MASK _PAGE_HPTEFLAGS
-#define H_PAGE_F_GIX_SHIFT 56
-#define H_PAGE_BUSY _RPAGE_RSV1 /* software: PTE & hash are busy */
-#define H_PAGE_F_SECOND _RPAGE_RSV2 /* HPTE is in 2ndary HPTEG */
-#define H_PAGE_F_GIX (_RPAGE_RSV3 | _RPAGE_RSV4 | _RPAGE_RPN44)
-#define H_PAGE_HASHPTE _RPAGE_RPN43 /* PTE has associated HPTE */
#ifdef CONFIG_PPC_64K_PAGES
#include <asm/book3s/64/hash-64k.h>
@@ -167,6 +162,9 @@ static inline int hash__pte_none(pte_t pte)
return (pte_val(pte) & ~H_PTE_NONE_MASK) == 0;
}
+unsigned long pte_get_hash_gslot(unsigned long vpn, unsigned long shift,
+ int ssize, real_pte_t rpte, unsigned int subpg_index);
+
/* This low level function performs the actual PTE insertion
* Setting the PTE depends on the MMU type and other factors. It's
* an horrible mess that I'm not going to try to clean up now but
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index e91e115a816f..50ed64fba4ae 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -90,6 +90,8 @@
#define HPTE_R_PP0 ASM_CONST(0x8000000000000000)
#define HPTE_R_TS ASM_CONST(0x4000000000000000)
#define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000)
+#define HPTE_R_KEY_BIT0 ASM_CONST(0x2000000000000000)
+#define HPTE_R_KEY_BIT1 ASM_CONST(0x1000000000000000)
#define HPTE_R_RPN_SHIFT 12
#define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000)
#define HPTE_R_RPN_3_0 ASM_CONST(0x01fffffffffff000)
@@ -104,6 +106,9 @@
#define HPTE_R_C ASM_CONST(0x0000000000000080)
#define HPTE_R_R ASM_CONST(0x0000000000000100)
#define HPTE_R_KEY_LO ASM_CONST(0x0000000000000e00)
+#define HPTE_R_KEY_BIT2 ASM_CONST(0x0000000000000800)
+#define HPTE_R_KEY_BIT3 ASM_CONST(0x0000000000000400)
+#define HPTE_R_KEY_BIT4 ASM_CONST(0x0000000000000200)
#define HPTE_R_KEY (HPTE_R_KEY_LO | HPTE_R_KEY_HI)
#define HPTE_V_1TB_SEG ASM_CONST(0x4000000000000000)
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index c9448e19847a..0abeb0e2d616 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -108,6 +108,16 @@ typedef struct {
#ifdef CONFIG_SPAPR_TCE_IOMMU
struct list_head iommu_group_mem_list;
#endif
+
+#ifdef CONFIG_PPC_MEM_KEYS
+ /*
+ * Each bit represents one protection key.
+ * bit set -> key allocated
+ * bit unset -> key available for allocation
+ */
+ u32 pkey_allocation_map;
+ s16 execute_only_pkey; /* key holding execute-only protection */
+#endif
} mm_context_t;
/*
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 6ca1208cedcb..51017726d495 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -14,8 +14,9 @@
*/
#define _PAGE_BIT_SWAP_TYPE 0
+#define _PAGE_NA 0
#define _PAGE_RO 0
-#define _PAGE_SHARED 0
+#define _PAGE_USER 0
#define _PAGE_EXEC 0x00001 /* execute permission */
#define _PAGE_WRITE 0x00002 /* write access allowed */
@@ -39,6 +40,7 @@
#define _RPAGE_RSV2 0x0800000000000000UL
#define _RPAGE_RSV3 0x0400000000000000UL
#define _RPAGE_RSV4 0x0200000000000000UL
+#define _RPAGE_RSV5 0x00040UL
#define _PAGE_PTE 0x4000000000000000UL /* distinguishes PTEs from pointers */
#define _PAGE_PRESENT 0x8000000000000000UL /* pte contains a translation */
@@ -58,6 +60,25 @@
/* Max physical address bit as per radix table */
#define _RPAGE_PA_MAX 57
+#ifdef CONFIG_PPC_MEM_KEYS
+#ifdef CONFIG_PPC_64K_PAGES
+#define H_PTE_PKEY_BIT0 _RPAGE_RSV1
+#define H_PTE_PKEY_BIT1 _RPAGE_RSV2
+#else /* CONFIG_PPC_64K_PAGES */
+#define H_PTE_PKEY_BIT0 0 /* _RPAGE_RSV1 is not available */
+#define H_PTE_PKEY_BIT1 0 /* _RPAGE_RSV2 is not available */
+#endif /* CONFIG_PPC_64K_PAGES */
+#define H_PTE_PKEY_BIT2 _RPAGE_RSV3
+#define H_PTE_PKEY_BIT3 _RPAGE_RSV4
+#define H_PTE_PKEY_BIT4 _RPAGE_RSV5
+#else /* CONFIG_PPC_MEM_KEYS */
+#define H_PTE_PKEY_BIT0 0
+#define H_PTE_PKEY_BIT1 0
+#define H_PTE_PKEY_BIT2 0
+#define H_PTE_PKEY_BIT3 0
+#define H_PTE_PKEY_BIT4 0
+#endif /* CONFIG_PPC_MEM_KEYS */
+
/*
* Max physical address bit we will use for now.
*
@@ -121,13 +142,16 @@
#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
_PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE | \
_PAGE_SOFT_DIRTY)
+
+#define H_PTE_PKEY (H_PTE_PKEY_BIT0 | H_PTE_PKEY_BIT1 | H_PTE_PKEY_BIT2 | \
+ H_PTE_PKEY_BIT3 | H_PTE_PKEY_BIT4)
/*
* Mask of bits returned by pte_pgprot()
*/
#define PAGE_PROT_BITS (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT | \
H_PAGE_4K_PFN | _PAGE_PRIVILEGED | _PAGE_ACCESSED | \
_PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_EXEC | \
- _PAGE_SOFT_DIRTY)
+ _PAGE_SOFT_DIRTY | H_PTE_PKEY)
/*
* We define 2 sets of base prot bits, one for basic pages (ie,
* cacheable kernel and user pages) and one for non cacheable
@@ -546,6 +570,40 @@ static inline int pte_present(pte_t pte)
{
return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT));
}
+
+#ifdef CONFIG_PPC_MEM_KEYS
+extern bool arch_pte_access_permitted(u64 pte, bool write, bool execute);
+#else
+static inline bool arch_pte_access_permitted(u64 pte, bool write, bool execute)
+{
+ return true;
+}
+#endif /* CONFIG_PPC_MEM_KEYS */
+
+#define pte_access_permitted pte_access_permitted
+static inline bool pte_access_permitted(pte_t pte, bool write)
+{
+ unsigned long pteval = pte_val(pte);
+ /* Also check for pte_user */
+ unsigned long clear_pte_bits = _PAGE_PRIVILEGED;
+ /*
+ * _PAGE_READ is needed for any access and will be
+ * cleared for PROT_NONE
+ */
+ unsigned long need_pte_bits = _PAGE_PRESENT | _PAGE_READ;
+
+ if (write)
+ need_pte_bits |= _PAGE_WRITE;
+
+ if ((pteval & need_pte_bits) != need_pte_bits)
+ return false;
+
+ if ((pteval & clear_pte_bits) == clear_pte_bits)
+ return false;
+
+ return arch_pte_access_permitted(pte_val(pte), write, 0);
+}
+
/*
* Conversion functions: convert a page and protection to a page entry,
* and a page entry and page directory to the page they refer to.
@@ -850,6 +908,11 @@ static inline int pud_bad(pud_t pud)
return hash__pud_bad(pud);
}
+#define pud_access_permitted pud_access_permitted
+static inline bool pud_access_permitted(pud_t pud, bool write)
+{
+ return pte_access_permitted(pud_pte(pud), write);
+}
#define pgd_write(pgd) pte_write(pgd_pte(pgd))
static inline void pgd_set(pgd_t *pgdp, unsigned long val)
@@ -889,6 +952,12 @@ static inline int pgd_bad(pgd_t pgd)
return hash__pgd_bad(pgd);
}
+#define pgd_access_permitted pgd_access_permitted
+static inline bool pgd_access_permitted(pgd_t pgd, bool write)
+{
+ return pte_access_permitted(pgd_pte(pgd), write);
+}
+
extern struct page *pgd_page(pgd_t pgd);
/* Pointers in the page table tree are physical addresses */
@@ -1009,6 +1078,12 @@ static inline int pmd_protnone(pmd_t pmd)
#define __pmd_write(pmd) __pte_write(pmd_pte(pmd))
#define pmd_savedwrite(pmd) pte_savedwrite(pmd_pte(pmd))
+#define pmd_access_permitted pmd_access_permitted
+static inline bool pmd_access_permitted(pmd_t pmd, bool write)
+{
+ return pte_access_permitted(pmd_pte(pmd), write);
+}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
index 849ecaae9e79..64d02a704bcb 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
@@ -51,6 +51,7 @@ static inline void arch_leave_lazy_mmu_mode(void)
#define arch_flush_lazy_mmu_mode() do {} while (0)
+extern void hash__tlbiel_all(unsigned int action);
extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize,
int ssize, unsigned long flags);
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index 6a9e68003387..8eea90f80e45 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -11,6 +11,12 @@ static inline int mmu_get_ap(int psize)
return mmu_psize_defs[psize].ap;
}
+#ifdef CONFIG_PPC_RADIX_MMU
+extern void radix__tlbiel_all(unsigned int action);
+#else
+static inline void radix__tlbiel_all(unsigned int action) { WARN_ON(1); };
+#endif
+
extern void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end);
extern void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
@@ -47,4 +53,5 @@ extern void radix__flush_tlb_lpid(unsigned long lpid);
extern void radix__flush_tlb_all(void);
extern void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
unsigned long address);
+
#endif
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index 58b576f654b3..0cac17253513 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -8,6 +8,44 @@
#include <asm/book3s/64/tlbflush-hash.h>
#include <asm/book3s/64/tlbflush-radix.h>
+/* TLB flush actions. Used as argument to tlbiel_all() */
+enum {
+ TLB_INVAL_SCOPE_GLOBAL = 0, /* invalidate all TLBs */
+ TLB_INVAL_SCOPE_LPID = 1, /* invalidate TLBs for current LPID */
+};
+
+#ifdef CONFIG_PPC_NATIVE
+static inline void tlbiel_all(void)
+{
+ /*
+ * This is used for host machine check and bootup.
+ *
+ * This uses early_radix_enabled and implementations use
+ * early_cpu_has_feature etc because that works early in boot
+ * and this is the machine check path which is not performance
+ * critical.
+ */
+ if (early_radix_enabled())
+ radix__tlbiel_all(TLB_INVAL_SCOPE_GLOBAL);
+ else
+ hash__tlbiel_all(TLB_INVAL_SCOPE_GLOBAL);
+}
+#else
+static inline void tlbiel_all(void) { BUG(); };
+#endif
+
+static inline void tlbiel_all_lpid(bool radix)
+{
+ /*
+ * This is used for guest machine check.
+ */
+ if (radix)
+ radix__tlbiel_all(TLB_INVAL_SCOPE_LPID);
+ else
+ hash__tlbiel_all(TLB_INVAL_SCOPE_LPID);
+}
+
+
#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
static inline void flush_pmd_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index 3c04249bcf39..fd06dbe7d7d3 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -133,9 +133,11 @@ struct pt_regs;
extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long);
extern void bad_page_fault(struct pt_regs *, unsigned long, int);
extern void _exception(int, struct pt_regs *, int, unsigned long);
+extern void _exception_pkey(int, struct pt_regs *, int, unsigned long, int);
extern void die(const char *, struct pt_regs *, long);
extern bool die_will_crash(void);
-
+extern void panic_flush_kmsg_start(void);
+extern void panic_flush_kmsg_end(void);
#endif /* !__ASSEMBLY__ */
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
index abef812de7f8..812535f40124 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -31,8 +31,10 @@ unsigned int create_cond_branch(const unsigned int *addr,
unsigned long target, int flags);
int patch_branch(unsigned int *addr, unsigned long target, int flags);
int patch_instruction(unsigned int *addr, unsigned int instr);
+int raw_patch_instruction(unsigned int *addr, unsigned int instr);
int instr_is_relative_branch(unsigned int instr);
+int instr_is_relative_link_branch(unsigned int instr);
int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr);
unsigned long branch_target(const unsigned int *instr);
unsigned int translate_branch(const unsigned int *dest,
diff --git a/arch/powerpc/include/asm/cpm.h b/arch/powerpc/include/asm/cpm.h
index b925df1b87d0..4c24ea8209bb 100644
--- a/arch/powerpc/include/asm/cpm.h
+++ b/arch/powerpc/include/asm/cpm.h
@@ -166,6 +166,6 @@ static inline int cpm_command(u32 command, u8 opcode)
}
#endif /* CONFIG_CPM */
-int cpm2_gpiochip_add32(struct device_node *np);
+int cpm2_gpiochip_add32(struct device *dev);
#endif
diff --git a/arch/powerpc/include/asm/cpm1.h b/arch/powerpc/include/asm/cpm1.h
index 3db821876d48..a116fe931789 100644
--- a/arch/powerpc/include/asm/cpm1.h
+++ b/arch/powerpc/include/asm/cpm1.h
@@ -605,5 +605,7 @@ enum cpm_clk {
};
int cpm1_clk_setup(enum cpm_clk_target target, int clock, int mode);
+int cpm1_gpiochip_add16(struct device *dev);
+int cpm1_gpiochip_add32(struct device *dev);
#endif /* __CPM1__ */
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 0546663a98db..a2c5c95882cf 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -107,12 +107,6 @@ struct cpu_spec {
* called in real mode to handle SLB and TLB errors.
*/
long (*machine_check_early)(struct pt_regs *regs);
-
- /*
- * Processor specific routine to flush tlbs.
- */
- void (*flush_tlb)(unsigned int action);
-
};
extern struct cpu_spec *cur_cpu_spec;
@@ -133,12 +127,6 @@ extern void cpu_feature_keys_init(void);
static inline void cpu_feature_keys_init(void) { }
#endif
-/* TLB flush actions. Used as argument to cpu_spec.flush_tlb() hook */
-enum {
- TLB_INVAL_SCOPE_GLOBAL = 0, /* invalidate all TLBs */
- TLB_INVAL_SCOPE_LPID = 1, /* invalidate TLBs for current LPID */
-};
-
#endif /* __ASSEMBLY__ */
/* CPU kernel features */
@@ -207,7 +195,7 @@ enum {
#define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0004000000000000)
#define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0008000000000000)
#define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0010000000000000)
-/* Free LONG_ASM_CONST(0x0020000000000000) */
+#define CPU_FTR_PKEY LONG_ASM_CONST(0x0020000000000000)
#define CPU_FTR_VMX_COPY LONG_ASM_CONST(0x0040000000000000)
#define CPU_FTR_TM LONG_ASM_CONST(0x0080000000000000)
#define CPU_FTR_CFAR LONG_ASM_CONST(0x0100000000000000)
@@ -454,7 +442,7 @@ enum {
CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_CFAR | CPU_FTR_HVMODE | \
- CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX)
+ CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX | CPU_FTR_PKEY)
#define CPU_FTRS_POWER8 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
CPU_FTR_MMCRA | CPU_FTR_SMT | \
@@ -464,7 +452,7 @@ enum {
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
- CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP)
+ CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_PKEY)
#define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG)
#define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL)
#define CPU_FTRS_POWER9 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
@@ -476,7 +464,8 @@ enum {
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
- CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300)
+ CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | \
+ CPU_FTR_PKEY)
#define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \
(~CPU_FTR_SAO))
#define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9
diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
new file mode 100644
index 000000000000..ce242b9ea8c6
--- /dev/null
+++ b/arch/powerpc/include/asm/drmem.h
@@ -0,0 +1,102 @@
+/*
+ * drmem.h: Power specific logical memory block representation
+ *
+ * Copyright 2017 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _ASM_POWERPC_LMB_H
+#define _ASM_POWERPC_LMB_H
+
+struct drmem_lmb {
+ u64 base_addr;
+ u32 drc_index;
+ u32 aa_index;
+ u32 flags;
+};
+
+struct drmem_lmb_info {
+ struct drmem_lmb *lmbs;
+ int n_lmbs;
+ u32 lmb_size;
+};
+
+extern struct drmem_lmb_info *drmem_info;
+
+#define for_each_drmem_lmb_in_range(lmb, start, end) \
+ for ((lmb) = (start); (lmb) <= (end); (lmb)++)
+
+#define for_each_drmem_lmb(lmb) \
+ for_each_drmem_lmb_in_range((lmb), \
+ &drmem_info->lmbs[0], \
+ &drmem_info->lmbs[drmem_info->n_lmbs - 1])
+
+/*
+ * The of_drconf_cell_v1 struct defines the layout of the LMB data
+ * specified in the ibm,dynamic-memory device tree property.
+ * The property itself is a 32-bit value specifying the number of
+ * LMBs followed by an array of of_drconf_cell_v1 entries, one
+ * per LMB.
+ */
+struct of_drconf_cell_v1 {
+ __be64 base_addr;
+ __be32 drc_index;
+ __be32 reserved;
+ __be32 aa_index;
+ __be32 flags;
+};
+
+/*
+ * Version 2 of the ibm,dynamic-memory property is defined as a
+ * 32-bit value specifying the number of LMB sets followed by an
+ * array of of_drconf_cell_v2 entries, one per LMB set.
+ */
+struct of_drconf_cell_v2 {
+ u32 seq_lmbs;
+ u64 base_addr;
+ u32 drc_index;
+ u32 aa_index;
+ u32 flags;
+} __packed;
+
+#define DRCONF_MEM_ASSIGNED 0x00000008
+#define DRCONF_MEM_AI_INVALID 0x00000040
+#define DRCONF_MEM_RESERVED 0x00000080
+
+static inline u32 drmem_lmb_size(void)
+{
+ return drmem_info->lmb_size;
+}
+
+#define DRMEM_LMB_RESERVED 0x80000000
+
+static inline void drmem_mark_lmb_reserved(struct drmem_lmb *lmb)
+{
+ lmb->flags |= DRMEM_LMB_RESERVED;
+}
+
+static inline void drmem_remove_lmb_reservation(struct drmem_lmb *lmb)
+{
+ lmb->flags &= ~DRMEM_LMB_RESERVED;
+}
+
+static inline bool drmem_lmb_reserved(struct drmem_lmb *lmb)
+{
+ return lmb->flags & DRMEM_LMB_RESERVED;
+}
+
+u64 drmem_lmb_memory_max(void);
+void __init walk_drmem_lmbs(struct device_node *dn,
+ void (*func)(struct drmem_lmb *, const __be32 **));
+int drmem_update_dt(void);
+
+#ifdef CONFIG_PPC_PSERIES
+void __init walk_drmem_lmbs_early(unsigned long node,
+ void (*func)(struct drmem_lmb *, const __be32 **));
+#endif
+
+#endif /* _ASM_POWERPC_LMB_H */
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 5161c37dd039..fd37cc101f4f 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -214,6 +214,7 @@ struct eeh_ops {
int (*write_config)(struct pci_dn *pdn, int where, int size, u32 val);
int (*next_error)(struct eeh_pe **pe);
int (*restore_config)(struct pci_dn *pdn);
+ int (*notify_resume)(struct pci_dn *pdn);
};
extern int eeh_subsystem_flags;
@@ -297,6 +298,7 @@ int eeh_pe_reset(struct eeh_pe *pe, int option);
int eeh_pe_configure(struct eeh_pe *pe);
int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
unsigned long addr, unsigned long mask);
+int eeh_restore_vf_config(struct pci_dn *pdn);
/**
* EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 7197b179c1b1..176dfb73d42c 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -251,18 +251,40 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
std r10,area+EX_R10(r13); /* save r10 - r12 */ \
OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
-#define __EXCEPTION_PROLOG_1(area, extra, vec) \
+#define __EXCEPTION_PROLOG_1_PRE(area) \
OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR); \
OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR); \
SAVE_CTR(r10, area); \
- mfcr r9; \
- extra(vec); \
+ mfcr r9;
+
+#define __EXCEPTION_PROLOG_1_POST(area) \
std r11,area+EX_R11(r13); \
std r12,area+EX_R12(r13); \
GET_SCRATCH0(r10); \
std r10,area+EX_R13(r13)
+
+/*
+ * This version of the EXCEPTION_PROLOG_1 will carry
+ * addition parameter called "bitmask" to support
+ * checking of the interrupt maskable level in the SOFTEN_TEST.
+ * Intended to be used in MASKABLE_EXCPETION_* macros.
+ */
+#define MASKABLE_EXCEPTION_PROLOG_1(area, extra, vec, bitmask) \
+ __EXCEPTION_PROLOG_1_PRE(area); \
+ extra(vec, bitmask); \
+ __EXCEPTION_PROLOG_1_POST(area);
+
+/*
+ * This version of the EXCEPTION_PROLOG_1 is intended
+ * to be used in STD_EXCEPTION* macros
+ */
+#define _EXCEPTION_PROLOG_1(area, extra, vec) \
+ __EXCEPTION_PROLOG_1_PRE(area); \
+ extra(vec); \
+ __EXCEPTION_PROLOG_1_POST(area);
+
#define EXCEPTION_PROLOG_1(area, extra, vec) \
- __EXCEPTION_PROLOG_1(area, extra, vec)
+ _EXCEPTION_PROLOG_1(area, extra, vec)
#define __EXCEPTION_PROLOG_PSERIES_1(label, h) \
ld r10,PACAKMSR(r13); /* get MSR value for kernel */ \
@@ -485,7 +507,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
mflr r9; /* Get LR, later save to stack */ \
ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \
std r9,_LINK(r1); \
- lbz r10,PACASOFTIRQEN(r13); \
+ lbz r10,PACAIRQSOFTMASK(r13); \
mfspr r11,SPRN_XER; /* save XER in stackframe */ \
std r10,SOFTE(r1); \
std r11,_XER(r1); \
@@ -549,22 +571,23 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
#define SOFTEN_VALUE_0xe80 PACA_IRQ_DBELL
#define SOFTEN_VALUE_0xe60 PACA_IRQ_HMI
#define SOFTEN_VALUE_0xea0 PACA_IRQ_EE
+#define SOFTEN_VALUE_0xf00 PACA_IRQ_PMI
-#define __SOFTEN_TEST(h, vec) \
- lbz r10,PACASOFTIRQEN(r13); \
- cmpwi r10,0; \
+#define __SOFTEN_TEST(h, vec, bitmask) \
+ lbz r10,PACAIRQSOFTMASK(r13); \
+ andi. r10,r10,bitmask; \
li r10,SOFTEN_VALUE_##vec; \
- beq masked_##h##interrupt
+ bne masked_##h##interrupt
-#define _SOFTEN_TEST(h, vec) __SOFTEN_TEST(h, vec)
+#define _SOFTEN_TEST(h, vec, bitmask) __SOFTEN_TEST(h, vec, bitmask)
-#define SOFTEN_TEST_PR(vec) \
+#define SOFTEN_TEST_PR(vec, bitmask) \
KVMTEST(EXC_STD, vec); \
- _SOFTEN_TEST(EXC_STD, vec)
+ _SOFTEN_TEST(EXC_STD, vec, bitmask)
-#define SOFTEN_TEST_HV(vec) \
+#define SOFTEN_TEST_HV(vec, bitmask) \
KVMTEST(EXC_HV, vec); \
- _SOFTEN_TEST(EXC_HV, vec)
+ _SOFTEN_TEST(EXC_HV, vec, bitmask)
#define KVMTEST_PR(vec) \
KVMTEST(EXC_STD, vec)
@@ -572,53 +595,57 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
#define KVMTEST_HV(vec) \
KVMTEST(EXC_HV, vec)
-#define SOFTEN_NOTEST_PR(vec) _SOFTEN_TEST(EXC_STD, vec)
-#define SOFTEN_NOTEST_HV(vec) _SOFTEN_TEST(EXC_HV, vec)
+#define SOFTEN_NOTEST_PR(vec, bitmask) _SOFTEN_TEST(EXC_STD, vec, bitmask)
+#define SOFTEN_NOTEST_HV(vec, bitmask) _SOFTEN_TEST(EXC_HV, vec, bitmask)
-#define __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) \
+#define __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) \
SET_SCRATCH0(r13); /* save r13 */ \
EXCEPTION_PROLOG_0(PACA_EXGEN); \
- __EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec); \
+ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec, bitmask); \
EXCEPTION_PROLOG_PSERIES_1(label, h);
-#define _MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) \
- __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra)
+#define _MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) \
+ __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra, bitmask)
-#define MASKABLE_EXCEPTION_PSERIES(loc, vec, label) \
+#define MASKABLE_EXCEPTION_PSERIES(loc, vec, label, bitmask) \
_MASKABLE_EXCEPTION_PSERIES(vec, label, \
- EXC_STD, SOFTEN_TEST_PR)
+ EXC_STD, SOFTEN_TEST_PR, bitmask)
-#define MASKABLE_EXCEPTION_PSERIES_OOL(vec, label) \
- EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_PR, vec); \
+#define MASKABLE_EXCEPTION_PSERIES_OOL(vec, label, bitmask) \
+ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_PR, vec, bitmask);\
EXCEPTION_PROLOG_PSERIES_1(label, EXC_STD)
-#define MASKABLE_EXCEPTION_HV(loc, vec, label) \
+#define MASKABLE_EXCEPTION_HV(loc, vec, label, bitmask) \
_MASKABLE_EXCEPTION_PSERIES(vec, label, \
- EXC_HV, SOFTEN_TEST_HV)
+ EXC_HV, SOFTEN_TEST_HV, bitmask)
-#define MASKABLE_EXCEPTION_HV_OOL(vec, label) \
- EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec); \
+#define MASKABLE_EXCEPTION_HV_OOL(vec, label, bitmask) \
+ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\
EXCEPTION_PROLOG_PSERIES_1(label, EXC_HV)
-#define __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) \
+#define __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) \
SET_SCRATCH0(r13); /* save r13 */ \
EXCEPTION_PROLOG_0(PACA_EXGEN); \
- __EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec); \
+ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec, bitmask); \
EXCEPTION_RELON_PROLOG_PSERIES_1(label, h)
-#define _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) \
- __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra)
+#define _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra, bitmask)\
+ __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra, bitmask)
-#define MASKABLE_RELON_EXCEPTION_PSERIES(loc, vec, label) \
+#define MASKABLE_RELON_EXCEPTION_PSERIES(loc, vec, label, bitmask) \
_MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \
- EXC_STD, SOFTEN_NOTEST_PR)
+ EXC_STD, SOFTEN_NOTEST_PR, bitmask)
+
+#define MASKABLE_RELON_EXCEPTION_PSERIES_OOL(vec, label, bitmask) \
+ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_PR, vec, bitmask);\
+ EXCEPTION_PROLOG_PSERIES_1(label, EXC_STD);
-#define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label) \
+#define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label, bitmask) \
_MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \
- EXC_HV, SOFTEN_TEST_HV)
+ EXC_HV, SOFTEN_TEST_HV, bitmask)
-#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label) \
- EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec); \
+#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask) \
+ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec, bitmask);\
EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV)
/*
diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index 8645897472b1..511acfd7ab0d 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -51,6 +51,8 @@
#define FW_FEATURE_BEST_ENERGY ASM_CONST(0x0000000080000000)
#define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0000000100000000)
#define FW_FEATURE_PRRN ASM_CONST(0x0000000200000000)
+#define FW_FEATURE_DRMEM_V2 ASM_CONST(0x0000000400000000)
+#define FW_FEATURE_DRC_INFO ASM_CONST(0x0000000400000000)
#ifndef __ASSEMBLY__
@@ -67,7 +69,8 @@ enum {
FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO |
FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY |
FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN |
- FW_FEATURE_HPT_RESIZE,
+ FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 |
+ FW_FEATURE_DRC_INFO,
FW_FEATURE_PSERIES_ALWAYS = 0,
FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL,
FW_FEATURE_POWERNV_ALWAYS = 0,
diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h
index 456f9e7b8d83..5986d473722b 100644
--- a/arch/powerpc/include/asm/hardirq.h
+++ b/arch/powerpc/include/asm/hardirq.h
@@ -29,6 +29,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
#define local_softirq_pending() __this_cpu_read(irq_stat.__softirq_pending)
#define __ARCH_SET_SOFTIRQ_PENDING
+#define __ARCH_IRQ_EXIT_IRQS_DISABLED
#define set_softirq_pending(x) __this_cpu_write(irq_stat.__softirq_pending, (x))
#define or_softirq_pending(x) __this_cpu_or(irq_stat.__softirq_pending, (x))
diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h
index fdcff76e9a25..7e0e93f24cb7 100644
--- a/arch/powerpc/include/asm/head-64.h
+++ b/arch/powerpc/include/asm/head-64.h
@@ -178,7 +178,7 @@ name:
* TRAMP_REAL_* - real, unrelocated helpers (virt can call these)
* TRAMP_VIRT_* - virt, unreloc helpers (in practice, real can use)
* TRAMP_KVM - KVM handlers that get put into real, unrelocated
- * EXC_COMMON_* - virt, relocated common handlers
+ * EXC_COMMON - virt, relocated common handlers
*
* The EXC handlers are given a name, and branch to name_common, or the
* appropriate KVM or masking function. Vector handler verieties are as
@@ -211,7 +211,6 @@ name:
* EXC_COMMON_BEGIN/END - used to open-code the handler
* EXC_COMMON
* EXC_COMMON_ASYNC
- * EXC_COMMON_HV
*
* TRAMP_REAL and TRAMP_VIRT can be used with BEGIN/END. KVM
* and OOL handlers are implemented as types of TRAMP and TRAMP_VIRT handlers.
@@ -269,14 +268,14 @@ name:
STD_RELON_EXCEPTION_PSERIES(start, realvec, name##_common); \
EXC_VIRT_END(name, start, size);
-#define EXC_REAL_MASKABLE(name, start, size) \
+#define EXC_REAL_MASKABLE(name, start, size, bitmask) \
EXC_REAL_BEGIN(name, start, size); \
- MASKABLE_EXCEPTION_PSERIES(start, start, name##_common); \
+ MASKABLE_EXCEPTION_PSERIES(start, start, name##_common, bitmask);\
EXC_REAL_END(name, start, size);
-#define EXC_VIRT_MASKABLE(name, start, size, realvec) \
+#define EXC_VIRT_MASKABLE(name, start, size, realvec, bitmask) \
EXC_VIRT_BEGIN(name, start, size); \
- MASKABLE_RELON_EXCEPTION_PSERIES(start, realvec, name##_common); \
+ MASKABLE_RELON_EXCEPTION_PSERIES(start, realvec, name##_common, bitmask);\
EXC_VIRT_END(name, start, size);
#define EXC_REAL_HV(name, start, size) \
@@ -305,13 +304,13 @@ name:
#define __EXC_REAL_OOL_MASKABLE(name, start, size) \
__EXC_REAL_OOL(name, start, size);
-#define __TRAMP_REAL_OOL_MASKABLE(name, vec) \
+#define __TRAMP_REAL_OOL_MASKABLE(name, vec, bitmask) \
TRAMP_REAL_BEGIN(tramp_real_##name); \
- MASKABLE_EXCEPTION_PSERIES_OOL(vec, name##_common); \
+ MASKABLE_EXCEPTION_PSERIES_OOL(vec, name##_common, bitmask); \
-#define EXC_REAL_OOL_MASKABLE(name, start, size) \
+#define EXC_REAL_OOL_MASKABLE(name, start, size, bitmask) \
__EXC_REAL_OOL_MASKABLE(name, start, size); \
- __TRAMP_REAL_OOL_MASKABLE(name, start);
+ __TRAMP_REAL_OOL_MASKABLE(name, start, bitmask);
#define __EXC_REAL_OOL_HV_DIRECT(name, start, size, handler) \
EXC_REAL_BEGIN(name, start, size); \
@@ -332,13 +331,13 @@ name:
#define __EXC_REAL_OOL_MASKABLE_HV(name, start, size) \
__EXC_REAL_OOL(name, start, size);
-#define __TRAMP_REAL_OOL_MASKABLE_HV(name, vec) \
+#define __TRAMP_REAL_OOL_MASKABLE_HV(name, vec, bitmask) \
TRAMP_REAL_BEGIN(tramp_real_##name); \
- MASKABLE_EXCEPTION_HV_OOL(vec, name##_common); \
+ MASKABLE_EXCEPTION_HV_OOL(vec, name##_common, bitmask); \
-#define EXC_REAL_OOL_MASKABLE_HV(name, start, size) \
+#define EXC_REAL_OOL_MASKABLE_HV(name, start, size, bitmask) \
__EXC_REAL_OOL_MASKABLE_HV(name, start, size); \
- __TRAMP_REAL_OOL_MASKABLE_HV(name, start);
+ __TRAMP_REAL_OOL_MASKABLE_HV(name, start, bitmask);
#define __EXC_VIRT_OOL(name, start, size) \
EXC_VIRT_BEGIN(name, start, size); \
@@ -356,13 +355,13 @@ name:
#define __EXC_VIRT_OOL_MASKABLE(name, start, size) \
__EXC_VIRT_OOL(name, start, size);
-#define __TRAMP_VIRT_OOL_MASKABLE(name, realvec) \
+#define __TRAMP_VIRT_OOL_MASKABLE(name, realvec, bitmask) \
TRAMP_VIRT_BEGIN(tramp_virt_##name); \
- MASKABLE_RELON_EXCEPTION_PSERIES_OOL(realvec, name##_common); \
+ MASKABLE_RELON_EXCEPTION_PSERIES_OOL(realvec, name##_common, bitmask);\
-#define EXC_VIRT_OOL_MASKABLE(name, start, size, realvec) \
+#define EXC_VIRT_OOL_MASKABLE(name, start, size, realvec, bitmask) \
__EXC_VIRT_OOL_MASKABLE(name, start, size); \
- __TRAMP_VIRT_OOL_MASKABLE(name, realvec);
+ __TRAMP_VIRT_OOL_MASKABLE(name, realvec, bitmask);
#define __EXC_VIRT_OOL_HV(name, start, size) \
__EXC_VIRT_OOL(name, start, size);
@@ -378,13 +377,13 @@ name:
#define __EXC_VIRT_OOL_MASKABLE_HV(name, start, size) \
__EXC_VIRT_OOL(name, start, size);
-#define __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec) \
+#define __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec, bitmask) \
TRAMP_VIRT_BEGIN(tramp_virt_##name); \
- MASKABLE_RELON_EXCEPTION_HV_OOL(realvec, name##_common); \
+ MASKABLE_RELON_EXCEPTION_HV_OOL(realvec, name##_common, bitmask);\
-#define EXC_VIRT_OOL_MASKABLE_HV(name, start, size, realvec) \
+#define EXC_VIRT_OOL_MASKABLE_HV(name, start, size, realvec, bitmask) \
__EXC_VIRT_OOL_MASKABLE_HV(name, start, size); \
- __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec);
+ __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec, bitmask);
#define TRAMP_KVM(area, n) \
TRAMP_KVM_BEGIN(do_kvm_##n); \
@@ -413,10 +412,6 @@ name:
EXC_COMMON_BEGIN(name); \
STD_EXCEPTION_COMMON_ASYNC(realvec, name, hdlr); \
-#define EXC_COMMON_HV(name, realvec, hdlr) \
- EXC_COMMON_BEGIN(name); \
- STD_EXCEPTION_COMMON(realvec + 0x2, name, hdlr); \
-
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_HEAD_64_H */
diff --git a/arch/powerpc/include/asm/hmi.h b/arch/powerpc/include/asm/hmi.h
index 85b7a1a21e22..9c14f7b5c46c 100644
--- a/arch/powerpc/include/asm/hmi.h
+++ b/arch/powerpc/include/asm/hmi.h
@@ -42,4 +42,8 @@ extern void wait_for_tb_resync(void);
static inline void wait_for_subcore_guest_exit(void) { }
static inline void wait_for_tb_resync(void) { }
#endif
+
+struct pt_regs;
+extern long hmi_handle_debugtrig(struct pt_regs *regs);
+
#endif /* __ASM_PPC64_HMI_H__ */
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 14c9d44f355b..1a4847f67ea8 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -47,8 +47,7 @@ static inline pte_t *hugepd_page(hugepd_t hpd)
{
BUG_ON(!hugepd_ok(hpd));
#ifdef CONFIG_PPC_8xx
- return (pte_t *)__va(hpd_val(hpd) &
- ~(_PMD_PAGE_MASK | _PMD_PRESENT_MASK));
+ return (pte_t *)__va(hpd_val(hpd) & ~HUGEPD_SHIFT_MASK);
#else
return (pte_t *)((hpd_val(hpd) &
~HUGEPD_SHIFT_MASK) | PD_HUGE);
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 3818fa0164f0..88e5e8f17e98 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -27,6 +27,15 @@
#define PACA_IRQ_DEC 0x08 /* Or FIT */
#define PACA_IRQ_EE_EDGE 0x10 /* BookE only */
#define PACA_IRQ_HMI 0x20
+#define PACA_IRQ_PMI 0x40
+
+/*
+ * flags for paca->irq_soft_mask
+ */
+#define IRQS_ENABLED 0
+#define IRQS_DISABLED 1 /* local_irq_disable() interrupts */
+#define IRQS_PMI_DISABLED 2
+#define IRQS_ALL_DISABLED (IRQS_DISABLED | IRQS_PMI_DISABLED)
#endif /* CONFIG_PPC64 */
@@ -43,46 +52,112 @@ extern void unknown_exception(struct pt_regs *regs);
#ifdef CONFIG_PPC64
#include <asm/paca.h>
-static inline unsigned long arch_local_save_flags(void)
+static inline notrace unsigned long irq_soft_mask_return(void)
{
unsigned long flags;
asm volatile(
"lbz %0,%1(13)"
: "=r" (flags)
- : "i" (offsetof(struct paca_struct, soft_enabled)));
+ : "i" (offsetof(struct paca_struct, irq_soft_mask)));
+
+ return flags;
+}
+
+/*
+ * The "memory" clobber acts as both a compiler barrier
+ * for the critical section and as a clobber because
+ * we changed paca->irq_soft_mask
+ */
+static inline notrace void irq_soft_mask_set(unsigned long mask)
+{
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+ /*
+ * The irq mask must always include the STD bit if any are set.
+ *
+ * and interrupts don't get replayed until the standard
+ * interrupt (local_irq_disable()) is unmasked.
+ *
+ * Other masks must only provide additional masking beyond
+ * the standard, and they are also not replayed until the
+ * standard interrupt becomes unmasked.
+ *
+ * This could be changed, but it will require partial
+ * unmasks to be replayed, among other things. For now, take
+ * the simple approach.
+ */
+ WARN_ON(mask && !(mask & IRQS_DISABLED));
+#endif
+
+ asm volatile(
+ "stb %0,%1(13)"
+ :
+ : "r" (mask),
+ "i" (offsetof(struct paca_struct, irq_soft_mask))
+ : "memory");
+}
+
+static inline notrace unsigned long irq_soft_mask_set_return(unsigned long mask)
+{
+ unsigned long flags;
+
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+ WARN_ON(mask && !(mask & IRQS_DISABLED));
+#endif
+
+ asm volatile(
+ "lbz %0,%1(13); stb %2,%1(13)"
+ : "=&r" (flags)
+ : "i" (offsetof(struct paca_struct, irq_soft_mask)),
+ "r" (mask)
+ : "memory");
return flags;
}
-static inline unsigned long arch_local_irq_disable(void)
+static inline notrace unsigned long irq_soft_mask_or_return(unsigned long mask)
{
- unsigned long flags, zero;
+ unsigned long flags, tmp;
asm volatile(
- "li %1,0; lbz %0,%2(13); stb %1,%2(13)"
- : "=r" (flags), "=&r" (zero)
- : "i" (offsetof(struct paca_struct, soft_enabled))
+ "lbz %0,%2(13); or %1,%0,%3; stb %1,%2(13)"
+ : "=&r" (flags), "=r" (tmp)
+ : "i" (offsetof(struct paca_struct, irq_soft_mask)),
+ "r" (mask)
: "memory");
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+ WARN_ON((mask | flags) && !((mask | flags) & IRQS_DISABLED));
+#endif
+
return flags;
}
+static inline unsigned long arch_local_save_flags(void)
+{
+ return irq_soft_mask_return();
+}
+
+static inline void arch_local_irq_disable(void)
+{
+ irq_soft_mask_set(IRQS_DISABLED);
+}
+
extern void arch_local_irq_restore(unsigned long);
static inline void arch_local_irq_enable(void)
{
- arch_local_irq_restore(1);
+ arch_local_irq_restore(IRQS_ENABLED);
}
static inline unsigned long arch_local_irq_save(void)
{
- return arch_local_irq_disable();
+ return irq_soft_mask_set_return(IRQS_DISABLED);
}
static inline bool arch_irqs_disabled_flags(unsigned long flags)
{
- return flags == 0;
+ return flags & IRQS_DISABLED;
}
static inline bool arch_irqs_disabled(void)
@@ -90,6 +165,55 @@ static inline bool arch_irqs_disabled(void)
return arch_irqs_disabled_flags(arch_local_save_flags());
}
+#ifdef CONFIG_PPC_BOOK3S
+/*
+ * To support disabling and enabling of irq with PMI, set of
+ * new powerpc_local_irq_pmu_save() and powerpc_local_irq_restore()
+ * functions are added. These macros are implemented using generic
+ * linux local_irq_* code from include/linux/irqflags.h.
+ */
+#define raw_local_irq_pmu_save(flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ flags = irq_soft_mask_or_return(IRQS_DISABLED | \
+ IRQS_PMI_DISABLED); \
+ } while(0)
+
+#define raw_local_irq_pmu_restore(flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ arch_local_irq_restore(flags); \
+ } while(0)
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+#define powerpc_local_irq_pmu_save(flags) \
+ do { \
+ raw_local_irq_pmu_save(flags); \
+ trace_hardirqs_off(); \
+ } while(0)
+#define powerpc_local_irq_pmu_restore(flags) \
+ do { \
+ if (raw_irqs_disabled_flags(flags)) { \
+ raw_local_irq_pmu_restore(flags); \
+ trace_hardirqs_off(); \
+ } else { \
+ trace_hardirqs_on(); \
+ raw_local_irq_pmu_restore(flags); \
+ } \
+ } while(0)
+#else
+#define powerpc_local_irq_pmu_save(flags) \
+ do { \
+ raw_local_irq_pmu_save(flags); \
+ } while(0)
+#define powerpc_local_irq_pmu_restore(flags) \
+ do { \
+ raw_local_irq_pmu_restore(flags); \
+ } while (0)
+#endif /* CONFIG_TRACE_IRQFLAGS */
+
+#endif /* CONFIG_PPC_BOOK3S */
+
#ifdef CONFIG_PPC_BOOK3E
#define __hard_irq_enable() asm volatile("wrteei 1" : : : "memory")
#define __hard_irq_disable() asm volatile("wrteei 0" : : : "memory")
@@ -98,14 +222,13 @@ static inline bool arch_irqs_disabled(void)
#define __hard_irq_disable() __mtmsrd(local_paca->kernel_msr, 1)
#endif
-#define hard_irq_disable() do { \
- u8 _was_enabled; \
- __hard_irq_disable(); \
- _was_enabled = local_paca->soft_enabled; \
- local_paca->soft_enabled = 0; \
- local_paca->irq_happened |= PACA_IRQ_HARD_DIS; \
- if (_was_enabled) \
- trace_hardirqs_off(); \
+#define hard_irq_disable() do { \
+ unsigned long flags; \
+ __hard_irq_disable(); \
+ flags = irq_soft_mask_set_return(IRQS_ALL_DISABLED); \
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS; \
+ if (!arch_irqs_disabled_flags(flags)) \
+ trace_hardirqs_off(); \
} while(0)
static inline bool lazy_irq_pending(void)
@@ -127,7 +250,7 @@ static inline void may_hard_irq_enable(void)
static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
{
- return !regs->softe;
+ return (regs->softe & IRQS_DISABLED);
}
extern bool prep_irq_for_idle(void);
diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h
index fad0e6ff460f..d76cb11be3e3 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -35,6 +35,13 @@
#define THREAD_IMC_ENABLE 0x8000000000000000ULL
/*
+ * For debugfs interface for imc-mode and imc-command
+ */
+#define IMC_CNTL_BLK_OFFSET 0x3FC00
+#define IMC_CNTL_BLK_CMD_OFFSET 8
+#define IMC_CNTL_BLK_MODE_OFFSET 32
+
+/*
* Structure to hold memory address information for imc units.
*/
struct imc_mem_info {
@@ -71,7 +78,7 @@ struct imc_events {
struct imc_pmu {
struct pmu pmu;
struct imc_mem_info *mem_info;
- struct imc_events **events;
+ struct imc_events *events;
/*
* Attribute groups for the PMU. Slot 0 used for
* format attribute, slot 1 used for cpusmask attribute,
diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h
index 1aeb5f13b8c4..1a6c1ce17735 100644
--- a/arch/powerpc/include/asm/irqflags.h
+++ b/arch/powerpc/include/asm/irqflags.h
@@ -47,14 +47,14 @@
* be clobbered.
*/
#define RECONCILE_IRQ_STATE(__rA, __rB) \
- lbz __rA,PACASOFTIRQEN(r13); \
+ lbz __rA,PACAIRQSOFTMASK(r13); \
lbz __rB,PACAIRQHAPPENED(r13); \
- cmpwi cr0,__rA,0; \
- li __rA,0; \
+ andi. __rA,__rA,IRQS_DISABLED; \
+ li __rA,IRQS_DISABLED; \
ori __rB,__rB,PACA_IRQ_HARD_DIS; \
stb __rB,PACAIRQHAPPENED(r13); \
- beq 44f; \
- stb __rA,PACASOFTIRQEN(r13); \
+ bne 44f; \
+ stb __rA,PACAIRQSOFTMASK(r13); \
TRACE_DISABLE_INTS; \
44:
@@ -64,9 +64,9 @@
#define RECONCILE_IRQ_STATE(__rA, __rB) \
lbz __rA,PACAIRQHAPPENED(r13); \
- li __rB,0; \
+ li __rB,IRQS_DISABLED; \
ori __rA,__rA,PACA_IRQ_HARD_DIS; \
- stb __rB,PACASOFTIRQEN(r13); \
+ stb __rB,PACAIRQSOFTMASK(r13); \
stb __rA,PACAIRQHAPPENED(r13)
#endif
#endif
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index 4419d435639a..9dcbfa6bbb91 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -73,6 +73,8 @@ extern void kexec_smp_wait(void); /* get and clear naca physid, wait for
master to copy new code to 0 */
extern int crashing_cpu;
extern void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *));
+extern void crash_ipi_callback(struct pt_regs *);
+extern int crash_wake_offline;
struct kimage;
struct pt_regs;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 941c2a3f231b..9db18287b5f4 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -873,7 +873,7 @@ static inline void kvmppc_fix_ee_before_entry(void)
/* Only need to enable IRQs by hard enabling them after this */
local_paca->irq_happened = 0;
- local_paca->soft_enabled = 1;
+ irq_soft_mask_set(IRQS_ENABLED);
#endif
}
diff --git a/arch/powerpc/include/asm/local.h b/arch/powerpc/include/asm/local.h
index 600a68bd77f5..fdd00939270b 100644
--- a/arch/powerpc/include/asm/local.h
+++ b/arch/powerpc/include/asm/local.h
@@ -2,76 +2,64 @@
#ifndef _ARCH_POWERPC_LOCAL_H
#define _ARCH_POWERPC_LOCAL_H
+#ifdef CONFIG_PPC_BOOK3S_64
+
#include <linux/percpu.h>
#include <linux/atomic.h>
+#include <linux/irqflags.h>
+
+#include <asm/hw_irq.h>
typedef struct
{
- atomic_long_t a;
+ long v;
} local_t;
-#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) }
-
-#define local_read(l) atomic_long_read(&(l)->a)
-#define local_set(l,i) atomic_long_set(&(l)->a, (i))
+#define LOCAL_INIT(i) { (i) }
-#define local_add(i,l) atomic_long_add((i),(&(l)->a))
-#define local_sub(i,l) atomic_long_sub((i),(&(l)->a))
-#define local_inc(l) atomic_long_inc(&(l)->a)
-#define local_dec(l) atomic_long_dec(&(l)->a)
-
-static __inline__ long local_add_return(long a, local_t *l)
+static __inline__ long local_read(local_t *l)
{
- long t;
-
- __asm__ __volatile__(
-"1:" PPC_LLARX(%0,0,%2,0) " # local_add_return\n\
- add %0,%1,%0\n"
- PPC405_ERR77(0,%2)
- PPC_STLCX "%0,0,%2 \n\
- bne- 1b"
- : "=&r" (t)
- : "r" (a), "r" (&(l->a.counter))
- : "cc", "memory");
-
- return t;
+ return READ_ONCE(l->v);
}
-#define local_add_negative(a, l) (local_add_return((a), (l)) < 0)
-
-static __inline__ long local_sub_return(long a, local_t *l)
+static __inline__ void local_set(local_t *l, long i)
{
- long t;
+ WRITE_ONCE(l->v, i);
+}
- __asm__ __volatile__(
-"1:" PPC_LLARX(%0,0,%2,0) " # local_sub_return\n\
- subf %0,%1,%0\n"
- PPC405_ERR77(0,%2)
- PPC_STLCX "%0,0,%2 \n\
- bne- 1b"
- : "=&r" (t)
- : "r" (a), "r" (&(l->a.counter))
- : "cc", "memory");
+#define LOCAL_OP(op, c_op) \
+static __inline__ void local_##op(long i, local_t *l) \
+{ \
+ unsigned long flags; \
+ \
+ powerpc_local_irq_pmu_save(flags); \
+ l->v c_op i; \
+ powerpc_local_irq_pmu_restore(flags); \
+}
- return t;
+#define LOCAL_OP_RETURN(op, c_op) \
+static __inline__ long local_##op##_return(long a, local_t *l) \
+{ \
+ long t; \
+ unsigned long flags; \
+ \
+ powerpc_local_irq_pmu_save(flags); \
+ t = (l->v c_op a); \
+ powerpc_local_irq_pmu_restore(flags); \
+ \
+ return t; \
}
-static __inline__ long local_inc_return(local_t *l)
-{
- long t;
+#define LOCAL_OPS(op, c_op) \
+ LOCAL_OP(op, c_op) \
+ LOCAL_OP_RETURN(op, c_op)
- __asm__ __volatile__(
-"1:" PPC_LLARX(%0,0,%1,0) " # local_inc_return\n\
- addic %0,%0,1\n"
- PPC405_ERR77(0,%1)
- PPC_STLCX "%0,0,%1 \n\
- bne- 1b"
- : "=&r" (t)
- : "r" (&(l->a.counter))
- : "cc", "xer", "memory");
+LOCAL_OPS(add, +=)
+LOCAL_OPS(sub, -=)
- return t;
-}
+#define local_add_negative(a, l) (local_add_return((a), (l)) < 0)
+#define local_inc_return(l) local_add_return(1LL, l)
+#define local_inc(l) local_inc_return(l)
/*
* local_inc_and_test - increment and test
@@ -81,28 +69,39 @@ static __inline__ long local_inc_return(local_t *l)
* and returns true if the result is zero, or false for all
* other cases.
*/
-#define local_inc_and_test(l) (local_inc_return(l) == 0)
+#define local_inc_and_test(l) (local_inc_return(l) == 0)
-static __inline__ long local_dec_return(local_t *l)
+#define local_dec_return(l) local_sub_return(1LL, l)
+#define local_dec(l) local_dec_return(l)
+#define local_sub_and_test(a, l) (local_sub_return((a), (l)) == 0)
+#define local_dec_and_test(l) (local_dec_return((l)) == 0)
+
+static __inline__ long local_cmpxchg(local_t *l, long o, long n)
{
long t;
+ unsigned long flags;
- __asm__ __volatile__(
-"1:" PPC_LLARX(%0,0,%1,0) " # local_dec_return\n\
- addic %0,%0,-1\n"
- PPC405_ERR77(0,%1)
- PPC_STLCX "%0,0,%1\n\
- bne- 1b"
- : "=&r" (t)
- : "r" (&(l->a.counter))
- : "cc", "xer", "memory");
+ powerpc_local_irq_pmu_save(flags);
+ t = l->v;
+ if (t == o)
+ l->v = n;
+ powerpc_local_irq_pmu_restore(flags);
return t;
}
-#define local_cmpxchg(l, o, n) \
- (cmpxchg_local(&((l)->a.counter), (o), (n)))
-#define local_xchg(l, n) (xchg_local(&((l)->a.counter), (n)))
+static __inline__ long local_xchg(local_t *l, long n)
+{
+ long t;
+ unsigned long flags;
+
+ powerpc_local_irq_pmu_save(flags);
+ t = l->v;
+ l->v = n;
+ powerpc_local_irq_pmu_restore(flags);
+
+ return t;
+}
/**
* local_add_unless - add unless the number is a given value
@@ -115,62 +114,35 @@ static __inline__ long local_dec_return(local_t *l)
*/
static __inline__ int local_add_unless(local_t *l, long a, long u)
{
- long t;
-
- __asm__ __volatile__ (
-"1:" PPC_LLARX(%0,0,%1,0) " # local_add_unless\n\
- cmpw 0,%0,%3 \n\
- beq- 2f \n\
- add %0,%2,%0 \n"
- PPC405_ERR77(0,%2)
- PPC_STLCX "%0,0,%1 \n\
- bne- 1b \n"
-" subf %0,%2,%0 \n\
-2:"
- : "=&r" (t)
- : "r" (&(l->a.counter)), "r" (a), "r" (u)
- : "cc", "memory");
-
- return t != u;
-}
-
-#define local_inc_not_zero(l) local_add_unless((l), 1, 0)
-
-#define local_sub_and_test(a, l) (local_sub_return((a), (l)) == 0)
-#define local_dec_and_test(l) (local_dec_return((l)) == 0)
-
-/*
- * Atomically test *l and decrement if it is greater than 0.
- * The function returns the old value of *l minus 1.
- */
-static __inline__ long local_dec_if_positive(local_t *l)
-{
- long t;
+ unsigned long flags;
+ int ret = 0;
- __asm__ __volatile__(
-"1:" PPC_LLARX(%0,0,%1,0) " # local_dec_if_positive\n\
- cmpwi %0,1\n\
- addi %0,%0,-1\n\
- blt- 2f\n"
- PPC405_ERR77(0,%1)
- PPC_STLCX "%0,0,%1\n\
- bne- 1b"
- "\n\
-2:" : "=&b" (t)
- : "r" (&(l->a.counter))
- : "cc", "memory");
+ powerpc_local_irq_pmu_save(flags);
+ if (l->v != u) {
+ l->v += a;
+ ret = 1;
+ }
+ powerpc_local_irq_pmu_restore(flags);
- return t;
+ return ret;
}
+#define local_inc_not_zero(l) local_add_unless((l), 1, 0)
+
/* Use these for per-cpu local_t variables: on some archs they are
* much more efficient than these naive implementations. Note they take
* a variable, not an address.
*/
-#define __local_inc(l) ((l)->a.counter++)
-#define __local_dec(l) ((l)->a.counter++)
-#define __local_add(i,l) ((l)->a.counter+=(i))
-#define __local_sub(i,l) ((l)->a.counter-=(i))
+#define __local_inc(l) ((l)->v++)
+#define __local_dec(l) ((l)->v++)
+#define __local_add(i,l) ((l)->v+=(i))
+#define __local_sub(i,l) ((l)->v-=(i))
+
+#else /* CONFIG_PPC64 */
+
+#include <asm-generic/local.h>
+
+#endif /* CONFIG_PPC64 */
#endif /* _ARCH_POWERPC_LOCAL_H */
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index cd2fc1cc1cc7..ffe7c71e1132 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -173,11 +173,19 @@ struct machdep_calls {
/* Called after scan and before resource survey */
void (*pcibios_fixup_phb)(struct pci_controller *hose);
+ /*
+ * Called after device has been added to bus and
+ * before sysfs has been created.
+ */
+ void (*pcibios_bus_add_device)(struct pci_dev *pdev);
+
resource_size_t (*pcibios_default_alignment)(void);
#ifdef CONFIG_PCI_IOV
void (*pcibios_fixup_sriov)(struct pci_dev *pdev);
resource_size_t (*pcibios_iov_resource_alignment)(struct pci_dev *, int resno);
+ int (*pcibios_sriov_enable)(struct pci_dev *pdev, u16 num_vfs);
+ int (*pcibios_sriov_disable)(struct pci_dev *pdev);
#endif /* CONFIG_PCI_IOV */
/* Called to shutdown machine specific hardware not already controlled
diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h
index 30922f699341..07e3f54de9e3 100644
--- a/arch/powerpc/include/asm/mman.h
+++ b/arch/powerpc/include/asm/mman.h
@@ -13,6 +13,7 @@
#include <asm/cputable.h>
#include <linux/mm.h>
+#include <linux/pkeys.h>
#include <asm/cpu_has_feature.h>
/*
@@ -22,13 +23,23 @@
static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
unsigned long pkey)
{
- return (prot & PROT_SAO) ? VM_SAO : 0;
+#ifdef CONFIG_PPC_MEM_KEYS
+ return (((prot & PROT_SAO) ? VM_SAO : 0) | pkey_to_vmflag_bits(pkey));
+#else
+ return ((prot & PROT_SAO) ? VM_SAO : 0);
+#endif
}
#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags)
{
+#ifdef CONFIG_PPC_MEM_KEYS
+ return (vm_flags & VM_SAO) ?
+ __pgprot(_PAGE_SAO | vmflag_to_pte_pkey_bits(vm_flags)) :
+ __pgprot(0 | vmflag_to_pte_pkey_bits(vm_flags));
+#else
return (vm_flags & VM_SAO) ? __pgprot(_PAGE_SAO) : __pgprot(0);
+#endif
}
#define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags)
diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h
index 5bb3dbede41a..2f806e329648 100644
--- a/arch/powerpc/include/asm/mmu-8xx.h
+++ b/arch/powerpc/include/asm/mmu-8xx.h
@@ -29,17 +29,25 @@
#define MI_Kp 0x40000000 /* Should always be set */
/*
- * All pages' PP exec bits are set to 000, which means Execute for Supervisor
- * and no Execute for User.
- * Then we use the APG to say whether accesses are according to Page rules,
- * "all Supervisor" rules (Exec for all) and "all User" rules (Exec for noone)
- * Therefore, we define 4 APG groups. msb is _PAGE_EXEC, lsb is _PAGE_USER
- * 0 (00) => Not User, no exec => 11 (all accesses performed as user)
- * 1 (01) => User but no exec => 11 (all accesses performed as user)
- * 2 (10) => Not User, exec => 01 (rights according to page definition)
- * 3 (11) => User, exec => 00 (all accesses performed as supervisor)
- */
-#define MI_APG_INIT 0xf4ffffff
+ * All pages' PP data bits are set to either 001 or 011 by copying _PAGE_EXEC
+ * into bit 21 in the ITLBmiss handler (bit 21 is the middle bit), which means
+ * respectively NA for All or X for Supervisor and no access for User.
+ * Then we use the APG to say whether accesses are according to Page rules or
+ * "all Supervisor" rules (Access to all)
+ * We also use the 2nd APG bit for _PAGE_ACCESSED when having SWAP:
+ * When that bit is not set access is done iaw "all user"
+ * which means no access iaw page rules.
+ * Therefore, we define 4 APG groups. lsb is _PMD_USER, 2nd is _PAGE_ACCESSED
+ * 0x => No access => 11 (all accesses performed as user iaw page definition)
+ * 10 => No user => 01 (all accesses performed according to page definition)
+ * 11 => User => 00 (all accesses performed as supervisor iaw page definition)
+ * We define all 16 groups so that all other bits of APG can take any value
+ */
+#ifdef CONFIG_SWAP
+#define MI_APG_INIT 0xf4f4f4f4
+#else
+#define MI_APG_INIT 0x44444444
+#endif
/* The effective page number register. When read, contains the information
* about the last instruction TLB miss. When MI_RPN is written, bits in
@@ -102,17 +110,25 @@
#define MD_Kp 0x40000000 /* Should always be set */
/*
- * All pages' PP data bits are set to either 000 or 011, which means
+ * All pages' PP data bits are set to either 000 or 011 or 001, which means
* respectively RW for Supervisor and no access for User, or RO for
- * Supervisor and no access for user.
+ * Supervisor and no access for user and NA for ALL.
* Then we use the APG to say whether accesses are according to Page rules or
* "all Supervisor" rules (Access to all)
- * Therefore, we define 2 APG groups. lsb is _PAGE_USER
- * 0 => No user => 01 (all accesses performed according to page definition)
- * 1 => User => 00 (all accesses performed as supervisor
- * according to page definition)
- */
-#define MD_APG_INIT 0x4fffffff
+ * We also use the 2nd APG bit for _PAGE_ACCESSED when having SWAP:
+ * When that bit is not set access is done iaw "all user"
+ * which means no access iaw page rules.
+ * Therefore, we define 4 APG groups. lsb is _PMD_USER, 2nd is _PAGE_ACCESSED
+ * 0x => No access => 11 (all accesses performed as user iaw page definition)
+ * 10 => No user => 01 (all accesses performed according to page definition)
+ * 11 => User => 00 (all accesses performed as supervisor iaw page definition)
+ * We define all 16 groups so that all other bits of APG can take any value
+ */
+#ifdef CONFIG_SWAP
+#define MD_APG_INIT 0xf4f4f4f4
+#else
+#define MD_APG_INIT 0x44444444
+#endif
/* The effective page number register. When read, contains the information
* about the last instruction TLB miss. When MD_RPN is written, bits in
@@ -164,6 +180,12 @@
*/
#define SPRN_M_TW 799
+/* APGs */
+#define M_APG0 0x00000000
+#define M_APG1 0x00000020
+#define M_APG2 0x00000040
+#define M_APG3 0x00000060
+
#ifndef __ASSEMBLY__
typedef struct {
unsigned int id;
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 6364f5c2cc3e..bb38312cff28 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -260,6 +260,15 @@ static inline bool early_radix_enabled(void)
}
#endif
+#ifdef CONFIG_PPC_MEM_KEYS
+extern u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address);
+#else
+static inline u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address)
+{
+ return 0;
+}
+#endif /* CONFIG_PPC_MEM_KEYS */
+
#endif /* !__ASSEMBLY__ */
/* The kernel use the constants below to index in the page sizes array.
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index e2a2b8400490..051b3d63afe3 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -187,11 +187,33 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
{
}
+#ifdef CONFIG_PPC_MEM_KEYS
+bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write,
+ bool execute, bool foreign);
+#else /* CONFIG_PPC_MEM_KEYS */
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
bool write, bool execute, bool foreign)
{
/* by default, allow everything */
return true;
}
+
+#define pkey_mm_init(mm)
+#define thread_pkey_regs_save(thread)
+#define thread_pkey_regs_restore(new_thread, old_thread)
+#define thread_pkey_regs_init(thread)
+
+static inline int vma_pkey(struct vm_area_struct *vma)
+{
+ return 0;
+}
+
+static inline u64 pte_to_hpte_pkey_bits(u64 pteflags)
+{
+ return 0x0UL;
+}
+
+#endif /* CONFIG_PPC_MEM_KEYS */
+
#endif /* __KERNEL__ */
#endif /* __ASM_POWERPC_MMU_CONTEXT_H */
diff --git a/arch/powerpc/include/asm/mpic_timer.h b/arch/powerpc/include/asm/mpic_timer.h
index 0e23cd4ac8aa..13e6702ec458 100644
--- a/arch/powerpc/include/asm/mpic_timer.h
+++ b/arch/powerpc/include/asm/mpic_timer.h
@@ -29,17 +29,17 @@ struct mpic_timer {
#ifdef CONFIG_MPIC_TIMER
struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev,
- const struct timeval *time);
+ time64_t time);
void mpic_start_timer(struct mpic_timer *handle);
void mpic_stop_timer(struct mpic_timer *handle);
-void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time);
+void mpic_get_remain_time(struct mpic_timer *handle, time64_t *time);
void mpic_free_timer(struct mpic_timer *handle);
#else
struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev,
- const struct timeval *time) { return NULL; }
+ time64_t time) { return NULL; }
void mpic_start_timer(struct mpic_timer *handle) { }
void mpic_stop_timer(struct mpic_timer *handle) { }
-void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time) { }
+void mpic_get_remain_time(struct mpic_timer *handle, time64_t *time) { }
void mpic_free_timer(struct mpic_timer *handle) { }
#endif
diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h
index e97f58689ca7..9c80939b4d14 100644
--- a/arch/powerpc/include/asm/nmi.h
+++ b/arch/powerpc/include/asm/nmi.h
@@ -4,10 +4,6 @@
#ifdef CONFIG_PPC_WATCHDOG
extern void arch_touch_nmi_watchdog(void);
-extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
- bool exclude_self);
-#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
-
#else
static inline void arch_touch_nmi_watchdog(void) {}
#endif
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index d072139ff2e5..29d37bd1f3b3 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -61,7 +61,8 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
pgtable_t pte_page)
{
- *pmdp = __pmd((page_to_pfn(pte_page) << PAGE_SHIFT) | _PMD_PRESENT);
+ *pmdp = __pmd((page_to_pfn(pte_page) << PAGE_SHIFT) | _PMD_USER |
+ _PMD_PRESENT);
}
#define pmd_pgtable(pmd) pmd_page(pmd)
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index cc2bfec3aa3b..504a3c36ce5c 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -282,7 +282,7 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm,
{
unsigned long set = pte_val(entry) &
(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
- unsigned long clr = ~pte_val(entry) & _PAGE_RO;
+ unsigned long clr = ~pte_val(entry) & (_PAGE_RO | _PAGE_NA);
pte_update(ptep, clr, set);
}
diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
index 6dc0180fd5c7..f04cb46ae8a1 100644
--- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
@@ -31,37 +31,34 @@
/* Definitions for 8xx embedded chips. */
#define _PAGE_PRESENT 0x0001 /* Page is valid */
#define _PAGE_NO_CACHE 0x0002 /* I: cache inhibit */
-#define _PAGE_SHARED 0x0004 /* No ASID (context) compare */
-#define _PAGE_SPECIAL 0x0008 /* SW entry, forced to 0 by the TLB miss */
+#define _PAGE_PRIVILEGED 0x0004 /* No ASID (context) compare */
+#define _PAGE_HUGE 0x0008 /* SPS: Small Page Size (1 if 16k, 512k or 8M)*/
#define _PAGE_DIRTY 0x0100 /* C: page changed */
/* These 4 software bits must be masked out when the L2 entry is loaded
* into the TLB.
*/
#define _PAGE_GUARDED 0x0010 /* Copied to L1 G entry in DTLB */
-#define _PAGE_USER 0x0020 /* Copied to L1 APG lsb */
-#define _PAGE_EXEC 0x0040 /* Copied to L1 APG */
-#define _PAGE_WRITETHRU 0x0080 /* software: caching is write through */
-#define _PAGE_ACCESSED 0x0800 /* software: page referenced */
+#define _PAGE_SPECIAL 0x0020 /* SW entry */
+#define _PAGE_EXEC 0x0040 /* Copied to PP (bit 21) in ITLB */
+#define _PAGE_ACCESSED 0x0080 /* software: page referenced */
+#define _PAGE_NA 0x0200 /* Supervisor NA, User no access */
#define _PAGE_RO 0x0600 /* Supervisor RO, User no access */
#define _PMD_PRESENT 0x0001
-#define _PMD_BAD 0x0ff0
+#define _PMD_BAD 0x0fd0
#define _PMD_PAGE_MASK 0x000c
#define _PMD_PAGE_8M 0x000c
#define _PMD_PAGE_512K 0x0004
+#define _PMD_USER 0x0020 /* APG 1 */
/* Until my rework is finished, 8xx still needs atomic PTE updates */
#define PTE_ATOMIC_UPDATES 1
-/* We need to add _PAGE_SHARED to kernel pages */
-#define _PAGE_KERNEL_RO (_PAGE_SHARED | _PAGE_RO)
-#define _PAGE_KERNEL_ROX (_PAGE_SHARED | _PAGE_RO | _PAGE_EXEC)
-#define _PAGE_KERNEL_RW (_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \
- _PAGE_HWWRITE)
-#define _PAGE_KERNEL_RWX (_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \
- _PAGE_HWWRITE | _PAGE_EXEC)
+#ifdef CONFIG_PPC_16K_PAGES
+#define _PAGE_PSIZE _PAGE_HUGE
+#endif
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_NOHASH_32_PTE_8xx_H */
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
index 5c68f4a59f75..c56de1e8026f 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -45,6 +45,29 @@ static inline int pte_present(pte_t pte)
return pte_val(pte) & _PAGE_PRESENT;
}
+/*
+ * We only find page table entry in the last level
+ * Hence no need for other accessors
+ */
+#define pte_access_permitted pte_access_permitted
+static inline bool pte_access_permitted(pte_t pte, bool write)
+{
+ unsigned long pteval = pte_val(pte);
+ /*
+ * A read-only access is controlled by _PAGE_USER bit.
+ * We have _PAGE_READ set for WRITE and EXECUTE
+ */
+ unsigned long need_pte_bits = _PAGE_PRESENT | _PAGE_USER;
+
+ if (write)
+ need_pte_bits |= _PAGE_WRITE;
+
+ if ((pteval & need_pte_bits) != need_pte_bits)
+ return false;
+
+ return true;
+}
+
/* Conversion functions: convert a page and protection to a page entry,
* and a page entry and page directory to the page they refer to.
*
@@ -103,7 +126,7 @@ static inline pte_t pte_mkspecial(pte_t pte)
static inline pte_t pte_mkhuge(pte_t pte)
{
- return pte;
+ return __pte(pte_val(pte) | _PAGE_HUGE);
}
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
@@ -212,8 +235,10 @@ extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addre
#define pgprot_cached(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
_PAGE_COHERENT))
+#if _PAGE_WRITETHRU != 0
#define pgprot_cached_wthru(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
_PAGE_COHERENT | _PAGE_WRITETHRU))
+#endif
#define pgprot_cached_noncoherent(prot) \
(__pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL))
diff --git a/arch/powerpc/include/asm/nohash/pte-book3e.h b/arch/powerpc/include/asm/nohash/pte-book3e.h
index 2da4532ca377..ccee8eb509bb 100644
--- a/arch/powerpc/include/asm/nohash/pte-book3e.h
+++ b/arch/powerpc/include/asm/nohash/pte-book3e.h
@@ -55,6 +55,7 @@
#define _PAGE_KERNEL_RWX (_PAGE_BAP_SW | _PAGE_BAP_SR | _PAGE_DIRTY | _PAGE_BAP_SX)
#define _PAGE_KERNEL_ROX (_PAGE_BAP_SR | _PAGE_BAP_SX)
#define _PAGE_USER (_PAGE_BAP_UR | _PAGE_BAP_SR) /* Can be read */
+#define _PAGE_PRIVILEGED (_PAGE_BAP_SR)
#define _PAGE_HASHPTE 0
#define _PAGE_BUSY 0
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 233c7504b1f2..24c73f5575ee 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -201,7 +201,10 @@
#define OPAL_SET_POWER_SHIFT_RATIO 155
#define OPAL_SENSOR_GROUP_CLEAR 156
#define OPAL_PCI_SET_P2P 157
-#define OPAL_LAST 157
+#define OPAL_NPU_SPA_SETUP 159
+#define OPAL_NPU_SPA_CLEAR_CACHE 160
+#define OPAL_NPU_TL_SET 161
+#define OPAL_LAST 161
/* Device tree flags */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 0c545f7fc77b..12e70fb58700 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -34,6 +34,12 @@ int64_t opal_npu_init_context(uint64_t phb_id, int pasid, uint64_t msr,
uint64_t bdf);
int64_t opal_npu_map_lpar(uint64_t phb_id, uint64_t bdf, uint64_t lparid,
uint64_t lpcr);
+int64_t opal_npu_spa_setup(uint64_t phb_id, uint32_t bdfn,
+ uint64_t addr, uint64_t PE_mask);
+int64_t opal_npu_spa_clear_cache(uint64_t phb_id, uint32_t bdfn,
+ uint64_t PE_handle);
+int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t bdfn, long cap,
+ uint64_t rate_phys, uint32_t size);
int64_t opal_console_write(int64_t term_number, __be64 *length,
const uint8_t *buffer);
int64_t opal_console_read(int64_t term_number, __be64 *length,
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 23ac7fc0af23..b62c31037cad 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -159,7 +159,7 @@ struct paca_struct {
u64 saved_r1; /* r1 save for RTAS calls or PM */
u64 saved_msr; /* MSR saved here by enter_rtas */
u16 trap_save; /* Used when bad stack is encountered */
- u8 soft_enabled; /* irq soft-enable flag */
+ u8 irq_soft_mask; /* mask for irq soft masking */
u8 irq_happened; /* irq happened while soft-disabled */
u8 io_sync; /* writel() needs spin_unlock sync */
u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */
@@ -239,8 +239,7 @@ struct paca_struct {
*/
u64 exrfi[EX_SIZE] __aligned(0x80);
void *rfi_flush_fallback_area;
- u64 l1d_flush_congruence;
- u64 l1d_flush_sets;
+ u64 l1d_flush_size;
#endif
};
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 62ed83db04ae..94d449031b18 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -197,25 +197,22 @@ struct pci_dn {
struct iommu_table_group *table_group; /* for phb's or bridges */
int pci_ext_config_space; /* for pci devices */
-
- struct pci_dev *pcidev; /* back-pointer to the pci device */
#ifdef CONFIG_EEH
struct eeh_dev *edev; /* eeh device */
#endif
#define IODA_INVALID_PE 0xFFFFFFFF
-#ifdef CONFIG_PPC_POWERNV
unsigned int pe_number;
- int vf_index; /* VF index in the PF */
#ifdef CONFIG_PCI_IOV
+ int vf_index; /* VF index in the PF */
u16 vfs_expanded; /* number of VFs IOV BAR expanded */
u16 num_vfs; /* number of VFs enabled*/
unsigned int *pe_num_map; /* PE# for the first VF PE or array */
bool m64_single_mode; /* Use M64 BAR in Single Mode */
#define IODA_INVALID_M64 (-1)
- int (*m64_map)[PCI_SRIOV_NUM_BARS];
+ int (*m64_map)[PCI_SRIOV_NUM_BARS]; /* Only used on powernv */
+ int last_allow_rc; /* Only used on pseries */
#endif /* CONFIG_PCI_IOV */
int mps; /* Maximum Payload Size */
-#endif
struct list_head child_list;
struct list_head list;
struct resource holes[PCI_SRIOV_NUM_BARS];
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 8dc32eacc97c..d82802ff5088 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -121,6 +121,8 @@ extern int remove_phb_dynamic(struct pci_controller *phb);
extern struct pci_dev *of_create_pci_dev(struct device_node *node,
struct pci_bus *bus, int devfn);
+extern unsigned int pci_parse_of_flags(u32 addr0, int bridge);
+
extern void of_scan_pci_bridge(struct pci_dev *dev);
extern void of_scan_bus(struct device_node *node, struct pci_bus *bus);
diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h
new file mode 100644
index 000000000000..0409c80c32c0
--- /dev/null
+++ b/arch/powerpc/include/asm/pkeys.h
@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * PowerPC Memory Protection Keys management
+ *
+ * Copyright 2017, Ram Pai, IBM Corporation.
+ */
+
+#ifndef _ASM_POWERPC_KEYS_H
+#define _ASM_POWERPC_KEYS_H
+
+#include <linux/jump_label.h>
+#include <asm/firmware.h>
+
+DECLARE_STATIC_KEY_TRUE(pkey_disabled);
+extern int pkeys_total; /* total pkeys as per device tree */
+extern u32 initial_allocation_mask; /* bits set for reserved keys */
+
+/*
+ * Define these here temporarily so we're not dependent on patching linux/mm.h.
+ * Once it's updated we can drop these.
+ */
+#ifndef VM_PKEY_BIT0
+# define VM_PKEY_SHIFT VM_HIGH_ARCH_BIT_0
+# define VM_PKEY_BIT0 VM_HIGH_ARCH_0
+# define VM_PKEY_BIT1 VM_HIGH_ARCH_1
+# define VM_PKEY_BIT2 VM_HIGH_ARCH_2
+# define VM_PKEY_BIT3 VM_HIGH_ARCH_3
+# define VM_PKEY_BIT4 VM_HIGH_ARCH_4
+#endif
+
+#define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | \
+ VM_PKEY_BIT3 | VM_PKEY_BIT4)
+
+/* Override any generic PKEY permission defines */
+#define PKEY_DISABLE_EXECUTE 0x4
+#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS | \
+ PKEY_DISABLE_WRITE | \
+ PKEY_DISABLE_EXECUTE)
+
+static inline u64 pkey_to_vmflag_bits(u16 pkey)
+{
+ return (((u64)pkey << VM_PKEY_SHIFT) & ARCH_VM_PKEY_FLAGS);
+}
+
+static inline u64 vmflag_to_pte_pkey_bits(u64 vm_flags)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return 0x0UL;
+
+ return (((vm_flags & VM_PKEY_BIT0) ? H_PTE_PKEY_BIT4 : 0x0UL) |
+ ((vm_flags & VM_PKEY_BIT1) ? H_PTE_PKEY_BIT3 : 0x0UL) |
+ ((vm_flags & VM_PKEY_BIT2) ? H_PTE_PKEY_BIT2 : 0x0UL) |
+ ((vm_flags & VM_PKEY_BIT3) ? H_PTE_PKEY_BIT1 : 0x0UL) |
+ ((vm_flags & VM_PKEY_BIT4) ? H_PTE_PKEY_BIT0 : 0x0UL));
+}
+
+static inline int vma_pkey(struct vm_area_struct *vma)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return 0;
+ return (vma->vm_flags & ARCH_VM_PKEY_FLAGS) >> VM_PKEY_SHIFT;
+}
+
+#define arch_max_pkey() pkeys_total
+
+static inline u64 pte_to_hpte_pkey_bits(u64 pteflags)
+{
+ return (((pteflags & H_PTE_PKEY_BIT0) ? HPTE_R_KEY_BIT0 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT1) ? HPTE_R_KEY_BIT1 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT2) ? HPTE_R_KEY_BIT2 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT3) ? HPTE_R_KEY_BIT3 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT4) ? HPTE_R_KEY_BIT4 : 0x0UL));
+}
+
+static inline u16 pte_to_pkey_bits(u64 pteflags)
+{
+ return (((pteflags & H_PTE_PKEY_BIT0) ? 0x10 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT1) ? 0x8 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT2) ? 0x4 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT3) ? 0x2 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT4) ? 0x1 : 0x0UL));
+}
+
+#define pkey_alloc_mask(pkey) (0x1 << pkey)
+
+#define mm_pkey_allocation_map(mm) (mm->context.pkey_allocation_map)
+
+#define __mm_pkey_allocated(mm, pkey) { \
+ mm_pkey_allocation_map(mm) |= pkey_alloc_mask(pkey); \
+}
+
+#define __mm_pkey_free(mm, pkey) { \
+ mm_pkey_allocation_map(mm) &= ~pkey_alloc_mask(pkey); \
+}
+
+#define __mm_pkey_is_allocated(mm, pkey) \
+ (mm_pkey_allocation_map(mm) & pkey_alloc_mask(pkey))
+
+#define __mm_pkey_is_reserved(pkey) (initial_allocation_mask & \
+ pkey_alloc_mask(pkey))
+
+static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
+{
+ /* A reserved key is never considered as 'explicitly allocated' */
+ return ((pkey < arch_max_pkey()) &&
+ !__mm_pkey_is_reserved(pkey) &&
+ __mm_pkey_is_allocated(mm, pkey));
+}
+
+extern void __arch_activate_pkey(int pkey);
+extern void __arch_deactivate_pkey(int pkey);
+/*
+ * Returns a positive, 5-bit key on success, or -1 on failure.
+ * Relies on the mmap_sem to protect against concurrency in mm_pkey_alloc() and
+ * mm_pkey_free().
+ */
+static inline int mm_pkey_alloc(struct mm_struct *mm)
+{
+ /*
+ * Note: this is the one and only place we make sure that the pkey is
+ * valid as far as the hardware is concerned. The rest of the kernel
+ * trusts that only good, valid pkeys come out of here.
+ */
+ u32 all_pkeys_mask = (u32)(~(0x0));
+ int ret;
+
+ if (static_branch_likely(&pkey_disabled))
+ return -1;
+
+ /*
+ * Are we out of pkeys? We must handle this specially because ffz()
+ * behavior is undefined if there are no zeros.
+ */
+ if (mm_pkey_allocation_map(mm) == all_pkeys_mask)
+ return -1;
+
+ ret = ffz((u32)mm_pkey_allocation_map(mm));
+ __mm_pkey_allocated(mm, ret);
+
+ /*
+ * Enable the key in the hardware
+ */
+ if (ret > 0)
+ __arch_activate_pkey(ret);
+ return ret;
+}
+
+static inline int mm_pkey_free(struct mm_struct *mm, int pkey)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return -1;
+
+ if (!mm_pkey_is_allocated(mm, pkey))
+ return -EINVAL;
+
+ /*
+ * Disable the key in the hardware
+ */
+ __arch_deactivate_pkey(pkey);
+ __mm_pkey_free(mm, pkey);
+
+ return 0;
+}
+
+/*
+ * Try to dedicate one of the protection keys to be used as an
+ * execute-only protection key.
+ */
+extern int __execute_only_pkey(struct mm_struct *mm);
+static inline int execute_only_pkey(struct mm_struct *mm)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return -1;
+
+ return __execute_only_pkey(mm);
+}
+
+extern int __arch_override_mprotect_pkey(struct vm_area_struct *vma,
+ int prot, int pkey);
+static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma,
+ int prot, int pkey)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return 0;
+
+ /*
+ * Is this an mprotect_pkey() call? If so, never override the value that
+ * came from the user.
+ */
+ if (pkey != -1)
+ return pkey;
+
+ return __arch_override_mprotect_pkey(vma, prot, pkey);
+}
+
+extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+ unsigned long init_val);
+static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+ unsigned long init_val)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return -EINVAL;
+ return __arch_set_user_pkey_access(tsk, pkey, init_val);
+}
+
+static inline bool arch_pkeys_enabled(void)
+{
+ return !static_branch_likely(&pkey_disabled);
+}
+
+extern void pkey_mm_init(struct mm_struct *mm);
+extern bool arch_supports_pkeys(int cap);
+extern unsigned int arch_usable_pkeys(void);
+extern void thread_pkey_regs_save(struct thread_struct *thread);
+extern void thread_pkey_regs_restore(struct thread_struct *new_thread,
+ struct thread_struct *old_thread);
+extern void thread_pkey_regs_init(struct thread_struct *thread);
+#endif /*_ASM_POWERPC_KEYS_H */
diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h
new file mode 100644
index 000000000000..f6945d3bc971
--- /dev/null
+++ b/arch/powerpc/include/asm/pnv-ocxl.h
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#ifndef _ASM_PNV_OCXL_H
+#define _ASM_PNV_OCXL_H
+
+#include <linux/pci.h>
+
+#define PNV_OCXL_TL_MAX_TEMPLATE 63
+#define PNV_OCXL_TL_BITS_PER_RATE 4
+#define PNV_OCXL_TL_RATE_BUF_SIZE ((PNV_OCXL_TL_MAX_TEMPLATE+1) * PNV_OCXL_TL_BITS_PER_RATE / 8)
+
+extern int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled,
+ u16 *supported);
+extern int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count);
+
+extern int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
+ char *rate_buf, int rate_buf_size);
+extern int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
+ uint64_t rate_buf_phys, int rate_buf_size);
+
+extern int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq);
+extern void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar,
+ void __iomem *tfc, void __iomem *pe_handle);
+extern int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr,
+ void __iomem **dar, void __iomem **tfc,
+ void __iomem **pe_handle);
+
+extern int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask,
+ void **platform_data);
+extern void pnv_ocxl_spa_release(void *platform_data);
+extern int pnv_ocxl_spa_remove_pe(void *platform_data, int pe_handle);
+
+extern int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr);
+extern void pnv_ocxl_free_xive_irq(u32 irq);
+
+#endif /* _ASM_PNV_OCXL_H */
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index ce0930d68857..ab5c1588b487 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -236,6 +236,7 @@
#define PPC_INST_RFCI 0x4c000066
#define PPC_INST_RFDI 0x4c00004e
#define PPC_INST_RFMCI 0x4c00004c
+#define PPC_INST_MFSPR 0x7c0002a6
#define PPC_INST_MFSPR_DSCR 0x7c1102a6
#define PPC_INST_MFSPR_DSCR_MASK 0xfc1ffffe
#define PPC_INST_MTSPR_DSCR 0x7c1103a6
@@ -383,6 +384,7 @@
#define __PPC_ME64(s) __PPC_MB64(s)
#define __PPC_BI(s) (((s) & 0x1f) << 16)
#define __PPC_CT(t) (((t) & 0x0f) << 21)
+#define __PPC_SPR(r) ((((r) & 0x1f) << 16) | ((((r) >> 5) & 0x1f) << 11))
/*
* Only use the larx hint bit on 64bit CPUs. e500v1/v2 based CPUs will treat a
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index bdab3b74eb98..01299cdc9806 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -309,6 +309,11 @@ struct thread_struct {
struct thread_vr_state ckvr_state; /* Checkpointed VR state */
unsigned long ckvrsave; /* Checkpointed VRSAVE */
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+#ifdef CONFIG_PPC_MEM_KEYS
+ unsigned long amr;
+ unsigned long iamr;
+ unsigned long uamor;
+#endif
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
void* kvm_shadow_vcpu; /* KVM internal data */
#endif /* CONFIG_KVM_BOOK3S_32_HANDLER */
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 825bd5998701..b04c5ce8191b 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -80,21 +80,20 @@ extern void of_instantiate_rtc(void);
extern int of_get_ibm_chip_id(struct device_node *np);
-/* The of_drconf_cell struct defines the layout of the LMB array
- * specified in the device tree property
- * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory
- */
-struct of_drconf_cell {
- u64 base_addr;
- u32 drc_index;
- u32 reserved;
- u32 aa_index;
- u32 flags;
+struct of_drc_info {
+ char *drc_type;
+ char *drc_name_prefix;
+ u32 drc_index_start;
+ u32 drc_name_suffix_start;
+ u32 num_sequential_elems;
+ u32 sequential_inc;
+ u32 drc_power_domain;
+ u32 last_drc_index;
};
-#define DRCONF_MEM_ASSIGNED 0x00000008
-#define DRCONF_MEM_AI_INVALID 0x00000040
-#define DRCONF_MEM_RESERVED 0x00000080
+extern int of_read_drc_info_cell(struct property **prop,
+ const __be32 **curval, struct of_drc_info *data);
+
/*
* There are two methods for telling firmware what our capabilities are.
@@ -159,6 +158,7 @@ struct of_drconf_cell {
#define OV5_PFO_HW_842 0x1140 /* PFO Compression Accelerator */
#define OV5_PFO_HW_ENCR 0x1120 /* PFO Encryption Accelerator */
#define OV5_SUB_PROCESSORS 0x1501 /* 1,2,or 4 Sub-Processors supported */
+#define OV5_DRMEM_V2 0x1680 /* ibm,dynamic-reconfiguration-v2 */
#define OV5_XIVE_SUPPORT 0x17C0 /* XIVE Exploitation Support Mask */
#define OV5_XIVE_LEGACY 0x1700 /* XIVE legacy mode Only */
#define OV5_XIVE_EXPLOIT 0x1740 /* XIVE exploitation mode Only */
@@ -175,6 +175,7 @@ struct of_drconf_cell {
#define OV5_HASH_GTSE 0x1940 /* Guest Translation Shoot Down Avail */
/* Radix Table Extensions */
#define OV5_RADIX_GTSE 0x1A40 /* Guest Translation Shoot Down Avail */
+#define OV5_DRC_INFO 0x1640 /* Redef Prop Structures: drc-info */
/* Option Vector 6: IBM PAPR hints */
#define OV6_LINUX 0x02 /* Linux is our OS */
diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h
index ce142ef99ba7..c4a72c7a8c83 100644
--- a/arch/powerpc/include/asm/pte-common.h
+++ b/arch/powerpc/include/asm/pte-common.h
@@ -8,9 +8,6 @@
#ifndef _PAGE_HASHPTE
#define _PAGE_HASHPTE 0
#endif
-#ifndef _PAGE_SHARED
-#define _PAGE_SHARED 0
-#endif
#ifndef _PAGE_HWWRITE
#define _PAGE_HWWRITE 0
#endif
@@ -45,6 +42,20 @@
#ifndef _PAGE_PTE
#define _PAGE_PTE 0
#endif
+/* At least one of _PAGE_PRIVILEGED or _PAGE_USER must be defined */
+#ifndef _PAGE_PRIVILEGED
+#define _PAGE_PRIVILEGED 0
+#else
+#ifndef _PAGE_USER
+#define _PAGE_USER 0
+#endif
+#endif
+#ifndef _PAGE_NA
+#define _PAGE_NA 0
+#endif
+#ifndef _PAGE_HUGE
+#define _PAGE_HUGE 0
+#endif
#ifndef _PMD_PRESENT_MASK
#define _PMD_PRESENT_MASK _PMD_PRESENT
@@ -53,17 +64,22 @@
#define _PMD_SIZE 0
#define PMD_PAGE_SIZE(pmd) bad_call_to_PMD_PAGE_SIZE()
#endif
+#ifndef _PMD_USER
+#define _PMD_USER 0
+#endif
#ifndef _PAGE_KERNEL_RO
-#define _PAGE_KERNEL_RO (_PAGE_RO)
+#define _PAGE_KERNEL_RO (_PAGE_PRIVILEGED | _PAGE_RO)
#endif
#ifndef _PAGE_KERNEL_ROX
-#define _PAGE_KERNEL_ROX (_PAGE_EXEC | _PAGE_RO)
+#define _PAGE_KERNEL_ROX (_PAGE_PRIVILEGED | _PAGE_RO | _PAGE_EXEC)
#endif
#ifndef _PAGE_KERNEL_RW
-#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE)
+#define _PAGE_KERNEL_RW (_PAGE_PRIVILEGED | _PAGE_DIRTY | _PAGE_RW | \
+ _PAGE_HWWRITE)
#endif
#ifndef _PAGE_KERNEL_RWX
-#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE | _PAGE_EXEC)
+#define _PAGE_KERNEL_RWX (_PAGE_PRIVILEGED | _PAGE_DIRTY | _PAGE_RW | \
+ _PAGE_HWWRITE | _PAGE_EXEC)
#endif
#ifndef _PAGE_HPTEFLAGS
#define _PAGE_HPTEFLAGS _PAGE_HASHPTE
@@ -85,7 +101,7 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void);
*/
static inline bool pte_user(pte_t pte)
{
- return (pte_val(pte) & _PAGE_USER) == _PAGE_USER;
+ return (pte_val(pte) & (_PAGE_USER | _PAGE_PRIVILEGED)) == _PAGE_USER;
}
#endif /* __ASSEMBLY__ */
@@ -115,7 +131,8 @@ static inline bool pte_user(pte_t pte)
/* Mask of bits returned by pte_pgprot() */
#define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
_PAGE_WRITETHRU | _PAGE_ENDIAN | _PAGE_4K_PFN | \
- _PAGE_USER | _PAGE_ACCESSED | _PAGE_RO | \
+ _PAGE_USER | _PAGE_ACCESSED | _PAGE_RO | _PAGE_NA | \
+ _PAGE_PRIVILEGED | \
_PAGE_RW | _PAGE_HWWRITE | _PAGE_DIRTY | _PAGE_EXEC)
/*
@@ -142,7 +159,7 @@ static inline bool pte_user(pte_t pte)
*
* Note due to the way vm flags are laid out, the bits are XWR
*/
-#define PAGE_NONE __pgprot(_PAGE_BASE)
+#define PAGE_NONE __pgprot(_PAGE_BASE | _PAGE_NA)
#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \
_PAGE_EXEC)
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index b779f3ccd412..e6c7eadf6bce 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -312,7 +312,6 @@
DSISR_BAD_EXT_CTRL)
#define DSISR_BAD_FAULT_64S (DSISR_BAD_FAULT_32S | \
DSISR_ATTR_CONFLICT | \
- DSISR_KEYFAULT | \
DSISR_UNSUPP_MMU | \
DSISR_PRTABLE_FAULT | \
DSISR_ICSWX_NO_CT | \
@@ -432,8 +431,9 @@
#define SPRN_LPID 0x13F /* Logical Partition Identifier */
#endif
#define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */
-#define SPRN_HMER 0x150 /* Hardware m? error recovery */
-#define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */
+#define SPRN_HMER 0x150 /* Hypervisor maintenance exception reg */
+#define HMER_DEBUG_TRIG (1ul << (63 - 17)) /* Debug trigger */
+#define SPRN_HMEER 0x151 /* Hyp maintenance exception enable reg */
#define SPRN_PCR 0x152 /* Processor compatibility register */
#define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */
#define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */
diff --git a/arch/powerpc/include/asm/reg_8xx.h b/arch/powerpc/include/asm/reg_8xx.h
index 53a7e2955d3e..7192eece6c3e 100644
--- a/arch/powerpc/include/asm/reg_8xx.h
+++ b/arch/powerpc/include/asm/reg_8xx.h
@@ -66,86 +66,4 @@
#define DC_DFWT 0x40000000 /* Data cache is forced write through */
#define DC_LES 0x20000000 /* Caches are little endian mode */
-#ifdef CONFIG_8xx_CPU6
-#define do_mtspr_cpu6(rn, rn_addr, v) \
- do { \
- int _reg_cpu6 = rn_addr, _tmp_cpu6; \
- asm volatile("stw %0, %1;" \
- "lwz %0, %1;" \
- "mtspr " __stringify(rn) ",%2" : \
- : "r" (_reg_cpu6), "m"(_tmp_cpu6), \
- "r" ((unsigned long)(v)) \
- : "memory"); \
- } while (0)
-
-#define do_mtspr(rn, v) asm volatile("mtspr " __stringify(rn) ",%0" : \
- : "r" ((unsigned long)(v)) \
- : "memory")
-#define mtspr(rn, v) \
- do { \
- if (rn == SPRN_IMMR) \
- do_mtspr_cpu6(rn, 0x3d30, v); \
- else if (rn == SPRN_IC_CST) \
- do_mtspr_cpu6(rn, 0x2110, v); \
- else if (rn == SPRN_IC_ADR) \
- do_mtspr_cpu6(rn, 0x2310, v); \
- else if (rn == SPRN_IC_DAT) \
- do_mtspr_cpu6(rn, 0x2510, v); \
- else if (rn == SPRN_DC_CST) \
- do_mtspr_cpu6(rn, 0x3110, v); \
- else if (rn == SPRN_DC_ADR) \
- do_mtspr_cpu6(rn, 0x3310, v); \
- else if (rn == SPRN_DC_DAT) \
- do_mtspr_cpu6(rn, 0x3510, v); \
- else if (rn == SPRN_MI_CTR) \
- do_mtspr_cpu6(rn, 0x2180, v); \
- else if (rn == SPRN_MI_AP) \
- do_mtspr_cpu6(rn, 0x2580, v); \
- else if (rn == SPRN_MI_EPN) \
- do_mtspr_cpu6(rn, 0x2780, v); \
- else if (rn == SPRN_MI_TWC) \
- do_mtspr_cpu6(rn, 0x2b80, v); \
- else if (rn == SPRN_MI_RPN) \
- do_mtspr_cpu6(rn, 0x2d80, v); \
- else if (rn == SPRN_MI_CAM) \
- do_mtspr_cpu6(rn, 0x2190, v); \
- else if (rn == SPRN_MI_RAM0) \
- do_mtspr_cpu6(rn, 0x2390, v); \
- else if (rn == SPRN_MI_RAM1) \
- do_mtspr_cpu6(rn, 0x2590, v); \
- else if (rn == SPRN_MD_CTR) \
- do_mtspr_cpu6(rn, 0x3180, v); \
- else if (rn == SPRN_M_CASID) \
- do_mtspr_cpu6(rn, 0x3380, v); \
- else if (rn == SPRN_MD_AP) \
- do_mtspr_cpu6(rn, 0x3580, v); \
- else if (rn == SPRN_MD_EPN) \
- do_mtspr_cpu6(rn, 0x3780, v); \
- else if (rn == SPRN_M_TWB) \
- do_mtspr_cpu6(rn, 0x3980, v); \
- else if (rn == SPRN_MD_TWC) \
- do_mtspr_cpu6(rn, 0x3b80, v); \
- else if (rn == SPRN_MD_RPN) \
- do_mtspr_cpu6(rn, 0x3d80, v); \
- else if (rn == SPRN_M_TW) \
- do_mtspr_cpu6(rn, 0x3f80, v); \
- else if (rn == SPRN_MD_CAM) \
- do_mtspr_cpu6(rn, 0x3190, v); \
- else if (rn == SPRN_MD_RAM0) \
- do_mtspr_cpu6(rn, 0x3390, v); \
- else if (rn == SPRN_MD_RAM1) \
- do_mtspr_cpu6(rn, 0x3590, v); \
- else if (rn == SPRN_DEC) \
- do_mtspr_cpu6(rn, 0x2c00, v); \
- else if (rn == SPRN_TBWL) \
- do_mtspr_cpu6(rn, 0x3880, v); \
- else if (rn == SPRN_TBWU) \
- do_mtspr_cpu6(rn, 0x3a80, v); \
- else if (rn == SPRN_DPDR) \
- do_mtspr_cpu6(rn, 0x2d30, v); \
- else \
- do_mtspr(rn, v); \
- } while (0)
-#endif
-
#endif /* _ASM_POWERPC_REG_8xx_H */
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 449912f057f6..d61f9c96d916 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -389,3 +389,6 @@ COMPAT_SYS_SPU(preadv2)
COMPAT_SYS_SPU(pwritev2)
SYSCALL(kexec_file_load)
SYSCALL(statx)
+SYSCALL(pkey_alloc)
+SYSCALL(pkey_free)
+SYSCALL(pkey_mprotect)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 9ba11dbcaca9..daf1ba97a00c 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -12,14 +12,10 @@
#include <uapi/asm/unistd.h>
-#define NR_syscalls 384
+#define NR_syscalls 387
#define __NR__exit __NR_exit
-#define __IGNORE_pkey_mprotect
-#define __IGNORE_pkey_alloc
-#define __IGNORE_pkey_free
-
#ifndef __ASSEMBLY__
#include <linux/types.h>
diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h
index 1d3f2be5ae39..fa4288822b68 100644
--- a/arch/powerpc/include/asm/xive-regs.h
+++ b/arch/powerpc/include/asm/xive-regs.h
@@ -10,6 +10,41 @@
#define _ASM_POWERPC_XIVE_REGS_H
/*
+ * "magic" Event State Buffer (ESB) MMIO offsets.
+ *
+ * Each interrupt source has a 2-bit state machine called ESB
+ * which can be controlled by MMIO. It's made of 2 bits, P and
+ * Q. P indicates that an interrupt is pending (has been sent
+ * to a queue and is waiting for an EOI). Q indicates that the
+ * interrupt has been triggered while pending.
+ *
+ * This acts as a coalescing mechanism in order to guarantee
+ * that a given interrupt only occurs at most once in a queue.
+ *
+ * When doing an EOI, the Q bit will indicate if the interrupt
+ * needs to be re-triggered.
+ *
+ * The following offsets into the ESB MMIO allow to read or
+ * manipulate the PQ bits. They must be used with an 8-bytes
+ * load instruction. They all return the previous state of the
+ * interrupt (atomically).
+ *
+ * Additionally, some ESB pages support doing an EOI via a
+ * store at 0 and some ESBs support doing a trigger via a
+ * separate trigger page.
+ */
+#define XIVE_ESB_STORE_EOI 0x400 /* Store */
+#define XIVE_ESB_LOAD_EOI 0x000 /* Load */
+#define XIVE_ESB_GET 0x800 /* Load */
+#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */
+#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */
+#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */
+#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */
+
+#define XIVE_ESB_VAL_P 0x2
+#define XIVE_ESB_VAL_Q 0x1
+
+/*
* Thread Management (aka "TM") registers
*/
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index 371fbebf1ec9..7624e22f5045 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -58,6 +58,9 @@ struct xive_irq_data {
#define XIVE_IRQ_FLAG_EOI_FW 0x10
#define XIVE_IRQ_FLAG_H_INT_ESB 0x20
+/* Special flag set by KVM for excalation interrupts */
+#define XIVE_IRQ_NO_EOI 0x80
+
#define XIVE_INVALID_CHIP_ID -1
/* A queue tracking structure in a CPU */
@@ -72,41 +75,6 @@ struct xive_q {
atomic_t pending_count;
};
-/*
- * "magic" Event State Buffer (ESB) MMIO offsets.
- *
- * Each interrupt source has a 2-bit state machine called ESB
- * which can be controlled by MMIO. It's made of 2 bits, P and
- * Q. P indicates that an interrupt is pending (has been sent
- * to a queue and is waiting for an EOI). Q indicates that the
- * interrupt has been triggered while pending.
- *
- * This acts as a coalescing mechanism in order to guarantee
- * that a given interrupt only occurs at most once in a queue.
- *
- * When doing an EOI, the Q bit will indicate if the interrupt
- * needs to be re-triggered.
- *
- * The following offsets into the ESB MMIO allow to read or
- * manipulate the PQ bits. They must be used with an 8-bytes
- * load instruction. They all return the previous state of the
- * interrupt (atomically).
- *
- * Additionally, some ESB pages support doing an EOI via a
- * store at 0 and some ESBs support doing a trigger via a
- * separate trigger page.
- */
-#define XIVE_ESB_STORE_EOI 0x400 /* Store */
-#define XIVE_ESB_LOAD_EOI 0x000 /* Load */
-#define XIVE_ESB_GET 0x800 /* Load */
-#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */
-#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */
-#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */
-#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */
-
-#define XIVE_ESB_VAL_P 0x2
-#define XIVE_ESB_VAL_Q 0x1
-
/* Global enable flags for the XIVE support */
extern bool __xive_enabled;
@@ -154,7 +122,7 @@ static inline bool xive_enabled(void) { return false; }
static inline bool xive_spapr_init(void) { return false; }
static inline bool xive_native_init(void) { return false; }
static inline void xive_smp_probe(void) { }
-extern inline int xive_smp_prepare_cpu(unsigned int cpu) { return -EINVAL; }
+static inline int xive_smp_prepare_cpu(unsigned int cpu) { return -EINVAL; }
static inline void xive_smp_setup_cpu(void) { }
static inline void xive_smp_disable_cpu(void) { }
static inline void xive_kexec_teardown_cpu(int secondary) { }
diff --git a/arch/powerpc/include/uapi/asm/elf.h b/arch/powerpc/include/uapi/asm/elf.h
index 5f201d40bcca..860c59291bfc 100644
--- a/arch/powerpc/include/uapi/asm/elf.h
+++ b/arch/powerpc/include/uapi/asm/elf.h
@@ -97,6 +97,7 @@
#define ELF_NTMSPRREG 3 /* include tfhar, tfiar, texasr */
#define ELF_NEBB 3 /* includes ebbrr, ebbhr, bescr */
#define ELF_NPMU 5 /* includes siar, sdar, sier, mmcr2, mmcr0 */
+#define ELF_NPKEY 3 /* includes amr, iamr, uamor */
typedef unsigned long elf_greg_t64;
typedef elf_greg_t64 elf_gregset_t64[ELF_NGREG];
diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h
index e63bc37e33af..65065ce32814 100644
--- a/arch/powerpc/include/uapi/asm/mman.h
+++ b/arch/powerpc/include/uapi/asm/mman.h
@@ -30,4 +30,10 @@
#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */
#define MAP_HUGETLB 0x40000 /* create a huge page mapping */
+/* Override any generic PKEY permission defines */
+#define PKEY_DISABLE_EXECUTE 0x4
+#undef PKEY_ACCESS_MASK
+#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
+ PKEY_DISABLE_WRITE |\
+ PKEY_DISABLE_EXECUTE)
#endif /* _UAPI_ASM_POWERPC_MMAN_H */
diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h
index df8684f31919..389c36fd8299 100644
--- a/arch/powerpc/include/uapi/asm/unistd.h
+++ b/arch/powerpc/include/uapi/asm/unistd.h
@@ -395,5 +395,8 @@
#define __NR_pwritev2 381
#define __NR_kexec_file_load 382
#define __NR_statx 383
+#define __NR_pkey_alloc 384
+#define __NR_pkey_free 385
+#define __NR_pkey_mprotect 386
#endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index f390d57cf2e1..88b84ac76b53 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -178,7 +178,7 @@ int main(void)
OFFSET(PACATOC, paca_struct, kernel_toc);
OFFSET(PACAKBASE, paca_struct, kernelbase);
OFFSET(PACAKMSR, paca_struct, kernel_msr);
- OFFSET(PACASOFTIRQEN, paca_struct, soft_enabled);
+ OFFSET(PACAIRQSOFTMASK, paca_struct, irq_soft_mask);
OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened);
#ifdef CONFIG_PPC_BOOK3S
OFFSET(PACACONTEXTID, paca_struct, mm_ctx_id);
@@ -239,8 +239,7 @@ int main(void)
OFFSET(PACA_IN_NMI, paca_struct, in_nmi);
OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area);
OFFSET(PACA_EXRFI, paca_struct, exrfi);
- OFFSET(PACA_L1D_FLUSH_CONGRUENCE, paca_struct, l1d_flush_congruence);
- OFFSET(PACA_L1D_FLUSH_SETS, paca_struct, l1d_flush_sets);
+ OFFSET(PACA_L1D_FLUSH_SIZE, paca_struct, l1d_flush_size);
#endif
OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
@@ -401,6 +400,8 @@ int main(void)
/* Other bits used by the vdso */
DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
+ DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
+ DEFINE(CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE);
DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 679bbe714e85..3f30c994e931 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -31,7 +31,6 @@ _GLOBAL(__setup_cpu_power7)
mfspr r3,SPRN_LPCR
li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
bl __init_LPCR_ISA206
- bl __init_tlb_power7
mtlr r11
blr
@@ -45,7 +44,6 @@ _GLOBAL(__restore_cpu_power7)
mfspr r3,SPRN_LPCR
li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
bl __init_LPCR_ISA206
- bl __init_tlb_power7
mtlr r11
blr
@@ -64,7 +62,6 @@ _GLOBAL(__setup_cpu_power8)
li r4,0 /* LPES = 0 */
bl __init_LPCR_ISA206
bl __init_HFSCR
- bl __init_tlb_power8
bl __init_PMU_HV
bl __init_PMU_HV_ISA207
mtlr r11
@@ -86,7 +83,6 @@ _GLOBAL(__restore_cpu_power8)
li r4,0 /* LPES = 0 */
bl __init_LPCR_ISA206
bl __init_HFSCR
- bl __init_tlb_power8
bl __init_PMU_HV
bl __init_PMU_HV_ISA207
mtlr r11
@@ -111,7 +107,6 @@ _GLOBAL(__setup_cpu_power9)
li r4,0 /* LPES = 0 */
bl __init_LPCR_ISA300
bl __init_HFSCR
- bl __init_tlb_power9
bl __init_PMU_HV
mtlr r11
blr
@@ -136,7 +131,6 @@ _GLOBAL(__restore_cpu_power9)
li r4,0 /* LPES = 0 */
bl __init_LPCR_ISA300
bl __init_HFSCR
- bl __init_tlb_power9
bl __init_PMU_HV
mtlr r11
blr
@@ -194,50 +188,6 @@ __init_HFSCR:
mtspr SPRN_HFSCR,r3
blr
-/*
- * Clear the TLB using the specified IS form of tlbiel instruction
- * (invalidate by congruence class). P7 has 128 CCs., P8 has 512.
- */
-__init_tlb_power7:
- li r6,POWER7_TLB_SETS
- mtctr r6
- li r7,0xc00 /* IS field = 0b11 */
- ptesync
-2: tlbiel r7
- addi r7,r7,0x1000
- bdnz 2b
- ptesync
-1: blr
-
-__init_tlb_power8:
- li r6,POWER8_TLB_SETS
- mtctr r6
- li r7,0xc00 /* IS field = 0b11 */
- ptesync
-2: tlbiel r7
- addi r7,r7,0x1000
- bdnz 2b
- ptesync
-1: blr
-
-/*
- * Flush the TLB in hash mode. Hash must flush with RIC=2 once for process
- * and one for partition scope to clear process and partition table entries.
- */
-__init_tlb_power9:
- li r6,POWER9_TLB_SETS_HASH - 1
- mtctr r6
- li r7,0xc00 /* IS field = 0b11 */
- li r8,0
- ptesync
- PPC_TLBIEL(7, 8, 2, 1, 0)
- PPC_TLBIEL(7, 8, 2, 0, 0)
-2: addi r7,r7,0x1000
- PPC_TLBIEL(7, 8, 0, 0, 0)
- bdnz 2b
- ptesync
-1: blr
-
__init_PMU_HV:
li r5,0
mtspr SPRN_MMCRC,r5
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 1350f49d81a8..c40a9fc1e5d1 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -74,9 +74,6 @@ extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec);
extern void __restore_cpu_power8(void);
extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* spec);
extern void __restore_cpu_power9(void);
-extern void __flush_tlb_power7(unsigned int action);
-extern void __flush_tlb_power8(unsigned int action);
-extern void __flush_tlb_power9(unsigned int action);
extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
@@ -368,7 +365,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_cpu_type = "ppc64/ibm-compat-v1",
.cpu_setup = __setup_cpu_power7,
.cpu_restore = __restore_cpu_power7,
- .flush_tlb = __flush_tlb_power7,
.machine_check_early = __machine_check_early_realmode_p7,
.platform = "power7",
},
@@ -386,7 +382,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_cpu_type = "ppc64/ibm-compat-v1",
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
- .flush_tlb = __flush_tlb_power8,
.machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
@@ -404,7 +399,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_cpu_type = "ppc64/ibm-compat-v1",
.cpu_setup = __setup_cpu_power9,
.cpu_restore = __restore_cpu_power9,
- .flush_tlb = __flush_tlb_power9,
.platform = "power9",
},
{ /* Power7 */
@@ -423,7 +417,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_POWER4,
.cpu_setup = __setup_cpu_power7,
.cpu_restore = __restore_cpu_power7,
- .flush_tlb = __flush_tlb_power7,
.machine_check_early = __machine_check_early_realmode_p7,
.platform = "power7",
},
@@ -443,7 +436,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_POWER4,
.cpu_setup = __setup_cpu_power7,
.cpu_restore = __restore_cpu_power7,
- .flush_tlb = __flush_tlb_power7,
.machine_check_early = __machine_check_early_realmode_p7,
.platform = "power7+",
},
@@ -463,7 +455,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
- .flush_tlb = __flush_tlb_power8,
.machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
@@ -483,7 +474,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
- .flush_tlb = __flush_tlb_power8,
.machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
@@ -503,7 +493,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
- .flush_tlb = __flush_tlb_power8,
.machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
@@ -523,7 +512,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
- .flush_tlb = __flush_tlb_power8,
.machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
@@ -543,7 +531,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power9,
.cpu_restore = __restore_cpu_power9,
- .flush_tlb = __flush_tlb_power9,
.machine_check_early = __machine_check_early_realmode_p9,
.platform = "power9",
},
@@ -563,7 +550,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power9,
.cpu_restore = __restore_cpu_power9,
- .flush_tlb = __flush_tlb_power9,
.machine_check_early = __machine_check_early_realmode_p9,
.platform = "power9",
},
@@ -583,7 +569,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power9,
.cpu_restore = __restore_cpu_power9,
- .flush_tlb = __flush_tlb_power9,
.machine_check_early = __machine_check_early_realmode_p9,
.platform = "power9",
},
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index cbabb5adccd9..00b215125d3e 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -44,6 +44,14 @@
#define REAL_MODE_TIMEOUT 10000
static int time_to_dump;
+/*
+ * crash_wake_offline should be set to 1 by platforms that intend to wake
+ * up offline cpus prior to jumping to a kdump kernel. Currently powernv
+ * sets it to 1, since we want to avoid things from happening when an
+ * offline CPU wakes up due to something like an HMI (malfunction error),
+ * which propagates to all threads.
+ */
+int crash_wake_offline;
#define CRASH_HANDLER_MAX 3
/* List of shutdown handles */
@@ -63,15 +71,12 @@ static int handle_fault(struct pt_regs *regs)
#ifdef CONFIG_SMP
static atomic_t cpus_in_crash;
-static void crash_ipi_callback(struct pt_regs *regs)
+void crash_ipi_callback(struct pt_regs *regs)
{
static cpumask_t cpus_state_saved = CPU_MASK_NONE;
int cpu = smp_processor_id();
- if (!cpu_online(cpu))
- return;
-
hard_irq_disable();
if (!cpumask_test_cpu(cpu, &cpus_state_saved)) {
crash_save_cpu(regs, cpu);
@@ -109,6 +114,9 @@ static void crash_kexec_prepare_cpus(int cpu)
printk(KERN_EMERG "Sending IPI to other CPUs\n");
+ if (crash_wake_offline)
+ ncpus = num_present_cpus() - 1;
+
crash_send_ipi(crash_ipi_callback);
smp_wmb();
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 8bdc2f96c5d6..945e2c29ad2d 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -77,8 +77,6 @@ struct dt_cpu_feature {
* Set up the base CPU
*/
-extern void __flush_tlb_power8(unsigned int action);
-extern void __flush_tlb_power9(unsigned int action);
extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
@@ -92,27 +90,6 @@ static struct {
static void (*init_pmu_registers)(void);
-static void cpufeatures_flush_tlb(void)
-{
- /*
- * This is a temporary measure to keep equivalent TLB flush as the
- * cputable based setup code.
- */
- switch (PVR_VER(mfspr(SPRN_PVR))) {
- case PVR_POWER8:
- case PVR_POWER8E:
- case PVR_POWER8NVL:
- __flush_tlb_power8(TLB_INVAL_SCOPE_GLOBAL);
- break;
- case PVR_POWER9:
- __flush_tlb_power9(TLB_INVAL_SCOPE_GLOBAL);
- break;
- default:
- pr_err("unknown CPU version for boot TLB flush\n");
- break;
- }
-}
-
static void __restore_cpu_cpufeatures(void)
{
/*
@@ -137,8 +114,6 @@ static void __restore_cpu_cpufeatures(void)
if (init_pmu_registers)
init_pmu_registers();
-
- cpufeatures_flush_tlb();
}
static char dt_cpu_name[64];
@@ -157,7 +132,6 @@ static struct cpu_spec __initdata base_cpu_spec = {
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = NULL,
.cpu_restore = __restore_cpu_cpufeatures,
- .flush_tlb = NULL,
.machine_check_early = NULL,
.platform = NULL,
};
@@ -412,7 +386,6 @@ static void init_pmu_power8(void)
static int __init feat_enable_mce_power8(struct dt_cpu_feature *f)
{
cur_cpu_spec->platform = "power8";
- cur_cpu_spec->flush_tlb = __flush_tlb_power8;
cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p8;
return 1;
@@ -451,7 +424,6 @@ static void init_pmu_power9(void)
static int __init feat_enable_mce_power9(struct dt_cpu_feature *f)
{
cur_cpu_spec->platform = "power9";
- cur_cpu_spec->flush_tlb = __flush_tlb_power9;
cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p9;
return 1;
@@ -752,8 +724,6 @@ static void __init cpufeatures_setup_finished(void)
system_registers.hfscr = mfspr(SPRN_HFSCR);
system_registers.fscr = mfspr(SPRN_FSCR);
- cpufeatures_flush_tlb();
-
pr_info("final cpu/mmu features = 0x%016lx 0x%08x\n",
cur_cpu_spec->cpu_features, cur_cpu_spec->mmu_features);
}
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index cbca0a667682..cc649809885e 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -740,6 +740,65 @@ static void *eeh_restore_dev_state(void *data, void *userdata)
return NULL;
}
+int eeh_restore_vf_config(struct pci_dn *pdn)
+{
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+ u32 devctl, cmd, cap2, aer_capctl;
+ int old_mps;
+
+ if (edev->pcie_cap) {
+ /* Restore MPS */
+ old_mps = (ffs(pdn->mps) - 8) << 5;
+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 2, &devctl);
+ devctl &= ~PCI_EXP_DEVCTL_PAYLOAD;
+ devctl |= old_mps;
+ eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 2, devctl);
+
+ /* Disable Completion Timeout */
+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2,
+ 4, &cap2);
+ if (cap2 & 0x10) {
+ eeh_ops->read_config(pdn,
+ edev->pcie_cap + PCI_EXP_DEVCTL2,
+ 4, &cap2);
+ cap2 |= 0x10;
+ eeh_ops->write_config(pdn,
+ edev->pcie_cap + PCI_EXP_DEVCTL2,
+ 4, cap2);
+ }
+ }
+
+ /* Enable SERR and parity checking */
+ eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd);
+ cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR);
+ eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd);
+
+ /* Enable report various errors */
+ if (edev->pcie_cap) {
+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 2, &devctl);
+ devctl &= ~PCI_EXP_DEVCTL_CERE;
+ devctl |= (PCI_EXP_DEVCTL_NFERE |
+ PCI_EXP_DEVCTL_FERE |
+ PCI_EXP_DEVCTL_URRE);
+ eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 2, devctl);
+ }
+
+ /* Enable ECRC generation and check */
+ if (edev->pcie_cap && edev->aer_cap) {
+ eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP,
+ 4, &aer_capctl);
+ aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
+ eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP,
+ 4, aer_capctl);
+ }
+
+ return 0;
+}
+
/**
* pcibios_set_pcie_reset_state - Set PCI-E reset state
* @dev: pci device struct
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 4f71e4c9beb7..beea2182d754 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -228,6 +228,7 @@ static void *eeh_report_error(void *data, void *userdata)
edev->in_error = true;
eeh_pcid_put(dev);
+ pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
return NULL;
}
@@ -381,6 +382,10 @@ static void *eeh_report_resume(void *data, void *userdata)
driver->err_handler->resume(dev);
eeh_pcid_put(dev);
+ pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
+#ifdef CONFIG_PCI_IOV
+ eeh_ops->notify_resume(eeh_dev_to_pdn(edev));
+#endif
return NULL;
}
@@ -416,6 +421,7 @@ static void *eeh_report_failure(void *data, void *userdata)
driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
eeh_pcid_put(dev);
+ pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
return NULL;
}
@@ -440,7 +446,7 @@ static void *eeh_add_virt_device(void *data, void *userdata)
return NULL;
}
-#ifdef CONFIG_PPC_POWERNV
+#ifdef CONFIG_PCI_IOV
pci_iov_add_virtfn(edev->physfn, pdn->vf_index);
#endif
return NULL;
@@ -496,7 +502,7 @@ static void *eeh_rmv_device(void *data, void *userdata)
(*removed)++;
if (edev->physfn) {
-#ifdef CONFIG_PPC_POWERNV
+#ifdef CONFIG_PCI_IOV
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
pci_iov_remove_virtfn(edev->physfn, pdn->vf_index);
diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
index 797549289798..deed906dd8f1 100644
--- a/arch/powerpc/kernel/eeh_sysfs.c
+++ b/arch/powerpc/kernel/eeh_sysfs.c
@@ -48,7 +48,7 @@ static ssize_t eeh_show_##_name(struct device *dev, \
\
return sprintf(buf, _format "\n", edev->_memb); \
} \
-static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL);
+static DEVICE_ATTR(_name, 0444, eeh_show_##_name, NULL);
EEH_SHOW_ATTR(eeh_mode, mode, "0x%x");
EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x");
@@ -90,6 +90,65 @@ static ssize_t eeh_pe_state_store(struct device *dev,
static DEVICE_ATTR_RW(eeh_pe_state);
+#ifdef CONFIG_PCI_IOV
+static ssize_t eeh_notify_resume_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+ struct pci_dn *pdn = pci_get_pdn(pdev);
+
+ if (!edev || !edev->pe)
+ return -ENODEV;
+
+ pdn = pci_get_pdn(pdev);
+ return sprintf(buf, "%d\n", pdn->last_allow_rc);
+}
+
+static ssize_t eeh_notify_resume_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+
+ if (!edev || !edev->pe || !eeh_ops->notify_resume)
+ return -ENODEV;
+
+ if (eeh_ops->notify_resume(pci_get_pdn(pdev)))
+ return -EIO;
+
+ return count;
+}
+static DEVICE_ATTR_RW(eeh_notify_resume);
+
+static int eeh_notify_resume_add(struct pci_dev *pdev)
+{
+ struct device_node *np;
+ int rc = 0;
+
+ np = pci_device_to_OF_node(pdev->is_physfn ? pdev : pdev->physfn);
+
+ if (of_property_read_bool(np, "ibm,is-open-sriov-pf"))
+ rc = device_create_file(&pdev->dev, &dev_attr_eeh_notify_resume);
+
+ return rc;
+}
+
+static void eeh_notify_resume_remove(struct pci_dev *pdev)
+{
+ struct device_node *np;
+
+ np = pci_device_to_OF_node(pdev->is_physfn ? pdev : pdev->physfn);
+
+ if (of_property_read_bool(np, "ibm,is-open-sriov-pf"))
+ device_remove_file(&pdev->dev, &dev_attr_eeh_notify_resume);
+}
+#else
+static inline int eeh_notify_resume_add(struct pci_dev *pdev) { return 0; }
+static inline void eeh_notify_resume_remove(struct pci_dev *pdev) { }
+#endif /* CONFIG_PCI_IOV */
+
void eeh_sysfs_add_device(struct pci_dev *pdev)
{
struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
@@ -104,6 +163,7 @@ void eeh_sysfs_add_device(struct pci_dev *pdev)
rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode);
rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_state);
+ rc += eeh_notify_resume_add(pdev);
if (rc)
pr_warn("EEH: Unable to create sysfs entries\n");
@@ -129,6 +189,8 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev)
device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
device_remove_file(&pdev->dev, &dev_attr_eeh_pe_state);
+ eeh_notify_resume_remove(pdev);
+
if (edev)
edev->mode &= ~EEH_DEV_SYSFS;
}
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index e780e1fbf6c2..eb8d01bae8c6 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -211,7 +211,7 @@ transfer_to_handler_cont:
mflr r9
lwz r11,0(r9) /* virtual address of handler */
lwz r9,4(r9) /* where to go when done */
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
@@ -301,7 +301,7 @@ stack_ovf:
lis r9,StackOverflow@ha
addi r9,r9,StackOverflow@l
LOAD_MSR_KERNEL(r10,MSR_KERNEL)
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
mtspr SPRN_SRR0,r9
@@ -430,7 +430,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
lwz r7,_NIP(r1)
lwz r2,GPR2(r1)
lwz r1,GPR1(r1)
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
mtspr SPRN_SRR0,r7
@@ -727,7 +727,7 @@ fast_exception_return:
lwz r10,_LINK(r11)
mtlr r10
REST_GPR(10, r11)
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
mtspr SPRN_SRR1,r9
@@ -978,7 +978,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
.globl exc_exit_restart
exc_exit_restart:
lwz r12,_NIP(r1)
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
mtspr SPRN_SRR0,r12
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 2748584b767d..2cb5109a7ea3 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -133,10 +133,9 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
* of irq tracing is used, we additionally check that condition
* is correct
*/
-#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_BUG)
- lbz r10,PACASOFTIRQEN(r13)
- xori r10,r10,1
-1: tdnei r10,0
+#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG)
+ lbz r10,PACAIRQSOFTMASK(r13)
+1: tdnei r10,IRQS_ENABLED
EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
#endif
@@ -152,7 +151,7 @@ system_call: /* label this so stack traces look sane */
/* We do need to set SOFTE in the stack frame or the return
* from interrupt will be painful
*/
- li r10,1
+ li r10,IRQS_ENABLED
std r10,SOFTE(r1)
CURRENT_THREAD_INFO(r11, r1)
@@ -755,10 +754,10 @@ resume_kernel:
beq+ restore
/* Check that preempt_count() == 0 and interrupts are enabled */
lwz r8,TI_PREEMPT(r9)
- cmpwi cr1,r8,0
+ cmpwi cr0,r8,0
+ bne restore
ld r0,SOFTE(r1)
- cmpdi r0,0
- crandc eq,cr1*4+eq,eq
+ andi. r0,r0,IRQS_DISABLED
bne restore
/*
@@ -796,12 +795,12 @@ restore:
* are about to re-enable interrupts
*/
ld r5,SOFTE(r1)
- lbz r6,PACASOFTIRQEN(r13)
- cmpwi cr0,r5,0
- beq .Lrestore_irq_off
+ lbz r6,PACAIRQSOFTMASK(r13)
+ andi. r5,r5,IRQS_DISABLED
+ bne .Lrestore_irq_off
/* We are enabling, were we already enabled ? Yes, just return */
- cmpwi cr0,r6,1
+ andi. r6,r6,IRQS_DISABLED
beq cr0,.Ldo_restore
/*
@@ -820,8 +819,8 @@ restore:
*/
.Lrestore_no_replay:
TRACE_ENABLE_INTS
- li r0,1
- stb r0,PACASOFTIRQEN(r13);
+ li r0,IRQS_ENABLED
+ stb r0,PACAIRQSOFTMASK(r13);
/*
* Final return path. BookE is handled in a different file
@@ -939,9 +938,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
beq 1f
rlwinm r7,r7,0,~PACA_IRQ_HARD_DIS
stb r7,PACAIRQHAPPENED(r13)
-1: li r0,0
- stb r0,PACASOFTIRQEN(r13);
- TRACE_DISABLE_INTS
+1:
+#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG)
+ /* The interrupt should not have soft enabled. */
+ lbz r7,PACAIRQSOFTMASK(r13)
+1: tdeqi r7,IRQS_ENABLED
+ EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
+#endif
b .Ldo_restore
/*
@@ -979,6 +982,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
addi r3,r1,STACK_FRAME_OVERHEAD;
bl do_IRQ
b ret_from_except
+1: cmpwi cr0,r3,0xf00
+ bne 1f
+ addi r3,r1,STACK_FRAME_OVERHEAD;
+ bl performance_monitor_exception
+ b ret_from_except
1: cmpwi cr0,r3,0xe60
bne 1f
addi r3,r1,STACK_FRAME_OVERHEAD;
@@ -1055,15 +1063,15 @@ _GLOBAL(enter_rtas)
li r0,0
mtcr r0
-#ifdef CONFIG_BUG
+#ifdef CONFIG_BUG
/* There is no way it is acceptable to get here with interrupts enabled,
* check it with the asm equivalent of WARN_ON
*/
- lbz r0,PACASOFTIRQEN(r13)
-1: tdnei r0,0
+ lbz r0,PACAIRQSOFTMASK(r13)
+1: tdeqi r0,IRQS_ENABLED
EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
#endif
-
+
/* Hard-disable interrupts */
mfmsr r6
rldicl r7,r6,48,1
@@ -1107,6 +1115,17 @@ __enter_rtas:
rtas_return_loc:
FIXUP_ENDIAN
+ /*
+ * Clear RI and set SF before anything.
+ */
+ mfmsr r6
+ li r0,MSR_RI
+ andc r6,r6,r0
+ sldi r0,r0,(MSR_SF_LG - MSR_RI_LG)
+ or r6,r6,r0
+ sync
+ mtmsrd r6
+
/* relocation is off at this point */
GET_PACA(r4)
clrldi r4,r4,2 /* convert to realmode address */
@@ -1115,12 +1134,6 @@ rtas_return_loc:
0: mflr r3
ld r3,(1f-0b)(r3) /* get &rtas_restore_regs */
- mfmsr r6
- li r0,MSR_RI
- andc r6,r6,r0
- sync
- mtmsrd r6
-
ld r1,PACAR1(r4) /* Restore our SP */
ld r4,PACASAVEDMSR(r4) /* Restore our MSR */
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index acd8ca76233e..ee832d344a5a 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -139,7 +139,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
mfspr r10,SPRN_ESR
SPECIAL_EXC_STORE(r10,ESR)
- lbz r10,PACASOFTIRQEN(r13)
+ lbz r10,PACAIRQSOFTMASK(r13)
SPECIAL_EXC_STORE(r10,SOFTE)
ld r10,_NIP(r1)
SPECIAL_EXC_STORE(r10,CSRR0)
@@ -206,17 +206,17 @@ BEGIN_FTR_SECTION
mtspr SPRN_MAS8,r10
END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
- lbz r6,PACASOFTIRQEN(r13)
+ lbz r6,PACAIRQSOFTMASK(r13)
ld r5,SOFTE(r1)
/* Interrupts had better not already be enabled... */
- twnei r6,0
+ tweqi r6,IRQS_ENABLED
- cmpwi cr0,r5,0
- beq 1f
+ andi. r6,r5,IRQS_DISABLED
+ bne 1f
TRACE_ENABLE_INTS
- stb r5,PACASOFTIRQEN(r13)
+ stb r5,PACAIRQSOFTMASK(r13)
1:
/*
* Restore PACAIRQHAPPENED rather than setting it based on
@@ -351,9 +351,9 @@ ret_from_mc_except:
#define PROLOG_ADDITION_NONE_MC(n)
#define PROLOG_ADDITION_MASKABLE_GEN(n) \
- lbz r10,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */ \
- cmpwi cr0,r10,0; /* yes -> go out of line */ \
- beq masked_interrupt_book3e_##n
+ lbz r10,PACAIRQSOFTMASK(r13); /* are irqs soft-masked? */ \
+ andi. r10,r10,IRQS_DISABLED; /* yes -> go out of line */ \
+ bne masked_interrupt_book3e_##n
#define PROLOG_ADDITION_2REGS_GEN(n) \
std r14,PACA_EXGEN+EX_R14(r13); \
@@ -397,7 +397,7 @@ exc_##n##_common: \
mfspr r8,SPRN_XER; /* save XER in stackframe */ \
ld r9,excf+EX_R1(r13); /* load orig r1 back from PACA */ \
lwz r10,excf+EX_CR(r13); /* load orig CR back from PACA */ \
- lbz r11,PACASOFTIRQEN(r13); /* get current IRQ softe */ \
+ lbz r11,PACAIRQSOFTMASK(r13); /* get current IRQ softe */ \
ld r12,exception_marker@toc(r2); \
li r0,0; \
std r3,GPR10(r1); /* save r10 to stackframe */ \
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 2dc10bf646b8..243d072a225a 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -718,10 +718,12 @@ EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
hardware_interrupt_hv:
BEGIN_FTR_SECTION
_MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common,
- EXC_HV, SOFTEN_TEST_HV)
+ EXC_HV, SOFTEN_TEST_HV,
+ IRQS_DISABLED)
FTR_SECTION_ELSE
_MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common,
- EXC_STD, SOFTEN_TEST_PR)
+ EXC_STD, SOFTEN_TEST_PR,
+ IRQS_DISABLED)
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
EXC_REAL_END(hardware_interrupt, 0x500, 0x100)
@@ -729,9 +731,13 @@ EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100)
.globl hardware_interrupt_relon_hv;
hardware_interrupt_relon_hv:
BEGIN_FTR_SECTION
- _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_HV, SOFTEN_TEST_HV)
+ _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common,
+ EXC_HV, SOFTEN_TEST_HV,
+ IRQS_DISABLED)
FTR_SECTION_ELSE
- _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_STD, SOFTEN_TEST_PR)
+ _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common,
+ EXC_STD, SOFTEN_TEST_PR,
+ IRQS_DISABLED)
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
@@ -827,8 +833,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
-EXC_REAL_MASKABLE(decrementer, 0x900, 0x80)
-EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900)
+EXC_REAL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED)
+EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900, IRQS_DISABLED)
TRAMP_KVM(PACA_EXGEN, 0x900)
EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt)
@@ -839,8 +845,8 @@ TRAMP_KVM_HV(PACA_EXGEN, 0x980)
EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt)
-EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0x100)
-EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x100, 0xa00)
+EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0x100, IRQS_DISABLED)
+EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x100, 0xa00, IRQS_DISABLED)
TRAMP_KVM(PACA_EXGEN, 0xa00)
#ifdef CONFIG_PPC_DOORBELL
EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, doorbell_exception)
@@ -1052,7 +1058,7 @@ EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt)
* mode.
*/
__EXC_REAL_OOL_HV_DIRECT(hmi_exception, 0xe60, 0x20, hmi_exception_early)
-__TRAMP_REAL_OOL_MASKABLE_HV(hmi_exception, 0xe60)
+__TRAMP_REAL_OOL_MASKABLE_HV(hmi_exception, 0xe60, IRQS_DISABLED)
EXC_VIRT_NONE(0x4e60, 0x20)
TRAMP_KVM_HV(PACA_EXGEN, 0xe60)
TRAMP_REAL_BEGIN(hmi_exception_early)
@@ -1110,8 +1116,8 @@ EXC_COMMON_BEGIN(hmi_exception_common)
EXCEPTION_COMMON(PACA_EXGEN, 0xe60, hmi_exception_common, handle_hmi_exception,
ret_from_except, FINISH_NAP;ADD_NVGPRS;ADD_RECONCILE;RUNLATCH_ON)
-EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20)
-EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80)
+EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20, IRQS_DISABLED)
+EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80, IRQS_DISABLED)
TRAMP_KVM_HV(PACA_EXGEN, 0xe80)
#ifdef CONFIG_PPC_DOORBELL
EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, doorbell_exception)
@@ -1120,8 +1126,8 @@ EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, unknown_exception)
#endif
-EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0x20)
-EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x20, 0xea0)
+EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0x20, IRQS_DISABLED)
+EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x20, 0xea0, IRQS_DISABLED)
TRAMP_KVM_HV(PACA_EXGEN, 0xea0)
EXC_COMMON_ASYNC(h_virt_irq_common, 0xea0, do_IRQ)
@@ -1132,8 +1138,8 @@ EXC_REAL_NONE(0xee0, 0x20)
EXC_VIRT_NONE(0x4ee0, 0x20)
-EXC_REAL_OOL(performance_monitor, 0xf00, 0x20)
-EXC_VIRT_OOL(performance_monitor, 0x4f00, 0x20, 0xf00)
+EXC_REAL_OOL_MASKABLE(performance_monitor, 0xf00, 0x20, IRQS_PMI_DISABLED)
+EXC_VIRT_OOL_MASKABLE(performance_monitor, 0x4f00, 0x20, 0xf00, IRQS_PMI_DISABLED)
TRAMP_KVM(PACA_EXGEN, 0xf00)
EXC_COMMON_ASYNC(performance_monitor_common, 0xf00, performance_monitor_exception)
@@ -1345,7 +1351,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
b .
#endif
-EXC_COMMON_HV(denorm_common, 0x1500, unknown_exception)
+EXC_COMMON(denorm_common, 0x1500, unknown_exception)
#ifdef CONFIG_CBE_RAS
@@ -1455,39 +1461,37 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback)
std r9,PACA_EXRFI+EX_R9(r13)
std r10,PACA_EXRFI+EX_R10(r13)
std r11,PACA_EXRFI+EX_R11(r13)
- std r12,PACA_EXRFI+EX_R12(r13)
- std r8,PACA_EXRFI+EX_R13(r13)
mfctr r9
ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
- ld r11,PACA_L1D_FLUSH_SETS(r13)
- ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
- /*
- * The load adresses are at staggered offsets within cachelines,
- * which suits some pipelines better (on others it should not
- * hurt).
- */
- addi r12,r12,8
+ ld r11,PACA_L1D_FLUSH_SIZE(r13)
+ srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
mtctr r11
DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
/* order ld/st prior to dcbt stop all streams with flushing */
sync
-1: li r8,0
- .rept 8 /* 8-way set associative */
- ldx r11,r10,r8
- add r8,r8,r12
- xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not
- add r8,r8,r11 // Add 0, this creates a dependency on the ldx
- .endr
- addi r10,r10,128 /* 128 byte cache line */
+
+ /*
+ * The load adresses are at staggered offsets within cachelines,
+ * which suits some pipelines better (on others it should not
+ * hurt).
+ */
+1:
+ ld r11,(0x80 + 8)*0(r10)
+ ld r11,(0x80 + 8)*1(r10)
+ ld r11,(0x80 + 8)*2(r10)
+ ld r11,(0x80 + 8)*3(r10)
+ ld r11,(0x80 + 8)*4(r10)
+ ld r11,(0x80 + 8)*5(r10)
+ ld r11,(0x80 + 8)*6(r10)
+ ld r11,(0x80 + 8)*7(r10)
+ addi r10,r10,0x80*8
bdnz 1b
mtctr r9
ld r9,PACA_EXRFI+EX_R9(r13)
ld r10,PACA_EXRFI+EX_R10(r13)
ld r11,PACA_EXRFI+EX_R11(r13)
- ld r12,PACA_EXRFI+EX_R12(r13)
- ld r8,PACA_EXRFI+EX_R13(r13)
GET_SCRATCH0(r13);
rfid
@@ -1497,39 +1501,37 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback)
std r9,PACA_EXRFI+EX_R9(r13)
std r10,PACA_EXRFI+EX_R10(r13)
std r11,PACA_EXRFI+EX_R11(r13)
- std r12,PACA_EXRFI+EX_R12(r13)
- std r8,PACA_EXRFI+EX_R13(r13)
mfctr r9
ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
- ld r11,PACA_L1D_FLUSH_SETS(r13)
- ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
- /*
- * The load adresses are at staggered offsets within cachelines,
- * which suits some pipelines better (on others it should not
- * hurt).
- */
- addi r12,r12,8
+ ld r11,PACA_L1D_FLUSH_SIZE(r13)
+ srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
mtctr r11
DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
/* order ld/st prior to dcbt stop all streams with flushing */
sync
-1: li r8,0
- .rept 8 /* 8-way set associative */
- ldx r11,r10,r8
- add r8,r8,r12
- xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not
- add r8,r8,r11 // Add 0, this creates a dependency on the ldx
- .endr
- addi r10,r10,128 /* 128 byte cache line */
+
+ /*
+ * The load adresses are at staggered offsets within cachelines,
+ * which suits some pipelines better (on others it should not
+ * hurt).
+ */
+1:
+ ld r11,(0x80 + 8)*0(r10)
+ ld r11,(0x80 + 8)*1(r10)
+ ld r11,(0x80 + 8)*2(r10)
+ ld r11,(0x80 + 8)*3(r10)
+ ld r11,(0x80 + 8)*4(r10)
+ ld r11,(0x80 + 8)*5(r10)
+ ld r11,(0x80 + 8)*6(r10)
+ ld r11,(0x80 + 8)*7(r10)
+ addi r10,r10,0x80*8
bdnz 1b
mtctr r9
ld r9,PACA_EXRFI+EX_R9(r13)
ld r10,PACA_EXRFI+EX_R10(r13)
ld r11,PACA_EXRFI+EX_R11(r13)
- ld r12,PACA_EXRFI+EX_R12(r13)
- ld r8,PACA_EXRFI+EX_R13(r13)
GET_SCRATCH0(r13);
hrfid
@@ -1632,7 +1634,7 @@ USE_TEXT_SECTION()
.balign IFETCH_ALIGN_BYTES
do_hash_page:
#ifdef CONFIG_PPC_BOOK3S_64
- lis r0,(DSISR_BAD_FAULT_64S|DSISR_DABRMATCH)@h
+ lis r0,(DSISR_BAD_FAULT_64S | DSISR_DABRMATCH | DSISR_KEYFAULT)@h
ori r0,r0,DSISR_BAD_FAULT_64S@l
and. r0,r4,r0 /* weird error? */
bne- handle_page_fault /* if not, try to insert a HPTE */
@@ -1828,6 +1830,8 @@ BEGIN_FTR_SECTION
FTR_SECTION_ELSE
beq hardware_interrupt_common
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_300)
+ cmpwi r3,0xf00
+ beq performance_monitor_common
BEGIN_FTR_SECTION
cmpwi r3,0xa00
beq h_doorbell_common_msgclr
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index aa71a90f5222..a61151a6ea5e 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -765,8 +765,8 @@ _GLOBAL(pmac_secondary_start)
/* Mark interrupts soft and hard disabled (they might be enabled
* in the PACA when doing hotplug)
*/
- li r0,0
- stb r0,PACASOFTIRQEN(r13)
+ li r0,IRQS_DISABLED
+ stb r0,PACAIRQSOFTMASK(r13)
li r0,PACA_IRQ_HARD_DIS
stb r0,PACAIRQHAPPENED(r13)
@@ -822,7 +822,8 @@ __secondary_start:
/* Mark interrupts soft and hard disabled (they might be enabled
* in the PACA when doing hotplug)
*/
- stb r7,PACASOFTIRQEN(r13)
+ li r7,IRQS_DISABLED
+ stb r7,PACAIRQSOFTMASK(r13)
li r0,PACA_IRQ_HARD_DIS
stb r0,PACAIRQHAPPENED(r13)
@@ -988,8 +989,8 @@ start_here_common:
/* Mark interrupts soft and hard disabled (they might be enabled
* in the PACA when doing hotplug)
*/
- li r0,0
- stb r0,PACASOFTIRQEN(r13)
+ li r0,IRQS_DISABLED
+ stb r0,PACAIRQSOFTMASK(r13)
li r0,PACA_IRQ_HARD_DIS
stb r0,PACAIRQHAPPENED(r13)
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 4fee00d414e8..d8670a37d70c 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -33,23 +33,6 @@
#include <asm/fixmap.h>
#include <asm/export.h>
-/* Macro to make the code more readable. */
-#ifdef CONFIG_8xx_CPU6
-#define SPRN_MI_TWC_ADDR 0x2b80
-#define SPRN_MI_RPN_ADDR 0x2d80
-#define SPRN_MD_TWC_ADDR 0x3b80
-#define SPRN_MD_RPN_ADDR 0x3d80
-
-#define MTSPR_CPU6(spr, reg, treg) \
- li treg, spr##_ADDR; \
- stw treg, 12(r0); \
- lwz treg, 12(r0); \
- mtspr spr, reg
-#else
-#define MTSPR_CPU6(spr, reg, treg) \
- mtspr spr, reg
-#endif
-
#if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000
/* By simply checking Address >= 0x80000000, we know if its a kernel address */
#define SIMPLE_KERNEL_ADDRESS 1
@@ -69,11 +52,7 @@
* Value for the bits that have fixed value in RPN entries.
* Also used for tagging DAR for DTLBerror.
*/
-#ifdef CONFIG_PPC_16K_PAGES
-#define RPN_PATTERN (0x00f0 | MD_SPS16K)
-#else
#define RPN_PATTERN 0x00f0
-#endif
#define PAGE_SHIFT_512K 19
#define PAGE_SHIFT_8M 23
@@ -134,15 +113,12 @@ turn_on_mmu:
* task's thread_struct.
*/
#define EXCEPTION_PROLOG \
- EXCEPTION_PROLOG_0; \
+ mtspr SPRN_SPRG_SCRATCH0, r10; \
+ mtspr SPRN_SPRG_SCRATCH1, r11; \
mfcr r10; \
EXCEPTION_PROLOG_1; \
EXCEPTION_PROLOG_2
-#define EXCEPTION_PROLOG_0 \
- mtspr SPRN_SPRG_SCRATCH0,r10; \
- mtspr SPRN_SPRG_SCRATCH1,r11
-
#define EXCEPTION_PROLOG_1 \
mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \
andi. r11,r11,MSR_PR; \
@@ -177,13 +153,6 @@ turn_on_mmu:
SAVE_2GPRS(7, r11)
/*
- * Exception exit code.
- */
-#define EXCEPTION_EPILOG_0 \
- mfspr r10,SPRN_SPRG_SCRATCH0; \
- mfspr r11,SPRN_SPRG_SCRATCH1
-
-/*
* Note: code which follows this uses cr0.eq (set if from kernel),
* r11, r12 (SRR0), and r9 (SRR1).
*
@@ -326,15 +295,10 @@ SystemCall:
#endif
InstructionTLBMiss:
-#if defined(CONFIG_8xx_CPU6) || defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
- mtspr SPRN_SPRG_SCRATCH2, r3
-#endif
- EXCEPTION_PROLOG_0
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
- lis r10, (itlb_miss_counter - PAGE_OFFSET)@ha
- lwz r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
- addi r11, r11, 1
- stw r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
+ mtspr SPRN_SPRG_SCRATCH0, r10
+ mtspr SPRN_SPRG_SCRATCH1, r11
+#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
+ mtspr SPRN_SPRG_SCRATCH2, r12
#endif
/* If we are faulting a kernel address, we have to use the
@@ -345,7 +309,7 @@ InstructionTLBMiss:
/* Only modules will cause ITLB Misses as we always
* pin the first 8MB of kernel memory */
#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
- mfcr r3
+ mfcr r12
#endif
#ifdef ITLB_MISS_KERNEL
#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT)
@@ -388,40 +352,46 @@ _ENTRY(ITLBMiss_cmp)
lwz r10, 0(r10) /* Get the pte */
4:
#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
- mtcr r3
+ mtcr r12
#endif
- /* Insert the APG into the TWC from the Linux PTE. */
- rlwimi r11, r10, 0, 25, 26
- /* Load the MI_TWC with the attributes for this "segment." */
- MTSPR_CPU6(SPRN_MI_TWC, r11, r3) /* Set segment attributes */
-#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES)
- rlwimi r10, r11, 1, MI_SPS16K
-#endif
#ifdef CONFIG_SWAP
- rlwinm r11, r10, 32-5, _PAGE_PRESENT
- and r11, r11, r10
- rlwimi r10, r11, 0, _PAGE_PRESENT
+ rlwinm r11, r10, 31, _PAGE_ACCESSED >> 1
#endif
- li r11, RPN_PATTERN
+ /* Load the MI_TWC with the attributes for this "segment." */
+ mtspr SPRN_MI_TWC, r11 /* Set segment attributes */
+
+ li r11, RPN_PATTERN | 0x200
/* The Linux PTE won't go exactly into the MMU TLB.
- * Software indicator bits 20-23 and 28 must be clear.
- * Software indicator bits 24, 25, 26, and 27 must be
+ * Software indicator bits 20 and 23 must be clear.
+ * Software indicator bits 22, 24, 25, 26, and 27 must be
* set. All other Linux PTE bits control the behavior
* of the MMU.
*/
-#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES)
- rlwimi r10, r11, 0, 0x0ff0 /* Set 24-27, clear 20-23 */
-#else
- rlwimi r10, r11, 0, 0x0ff8 /* Set 24-27, clear 20-23,28 */
-#endif
- MTSPR_CPU6(SPRN_MI_RPN, r10, r3) /* Update TLB entry */
+ rlwimi r11, r10, 4, 0x0400 /* Copy _PAGE_EXEC into bit 21 */
+ rlwimi r10, r11, 0, 0x0ff0 /* Set 22, 24-27, clear 20,23 */
+ mtspr SPRN_MI_RPN, r10 /* Update TLB entry */
/* Restore registers */
-#if defined(CONFIG_8xx_CPU6) || defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
- mfspr r3, SPRN_SPRG_SCRATCH2
+_ENTRY(itlb_miss_exit_1)
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
+ mfspr r12, SPRN_SPRG_SCRATCH2
+#endif
+ rfi
+#ifdef CONFIG_PERF_EVENTS
+_ENTRY(itlb_miss_perf)
+ lis r10, (itlb_miss_counter - PAGE_OFFSET)@ha
+ lwz r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
+ addi r11, r11, 1
+ stw r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
+#endif
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
+ mfspr r12, SPRN_SPRG_SCRATCH2
#endif
- EXCEPTION_EPILOG_0
rfi
#ifdef CONFIG_HUGETLB_PAGE
@@ -436,7 +406,6 @@ _ENTRY(ITLBMiss_cmp)
rlwinm r10, r11, 0, ~HUGEPD_SHIFT_MASK
#endif
lwz r10, 0(r10) /* Get the pte */
- rlwinm r11, r11, 0, 0xf
b 4b
20: /* 512k pages */
@@ -445,21 +414,15 @@ _ENTRY(ITLBMiss_cmp)
/* Add level 2 base */
rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1) - 1
lwz r10, 0(r10) /* Get the pte */
- rlwinm r11, r11, 0, 0xf
b 4b
#endif
. = 0x1200
DataStoreTLBMiss:
- mtspr SPRN_SPRG_SCRATCH2, r3
- EXCEPTION_PROLOG_0
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
- lis r10, (dtlb_miss_counter - PAGE_OFFSET)@ha
- lwz r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
- addi r11, r11, 1
- stw r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
-#endif
- mfcr r3
+ mtspr SPRN_SPRG_SCRATCH0, r10
+ mtspr SPRN_SPRG_SCRATCH1, r11
+ mtspr SPRN_SPRG_SCRATCH2, r12
+ mfcr r12
/* If we are faulting a kernel address, we have to use the
* kernel page tables.
@@ -499,59 +462,49 @@ _ENTRY(DTLBMiss_jmp)
rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */
lwz r10, 0(r10) /* Get the pte */
4:
- mtcr r3
+ mtcr r12
- /* Insert the Guarded flag and APG into the TWC from the Linux PTE.
- * It is bit 26-27 of both the Linux PTE and the TWC (at least
+ /* Insert the Guarded flag into the TWC from the Linux PTE.
+ * It is bit 27 of both the Linux PTE and the TWC (at least
* I got that right :-). It will be better when we can put
* this into the Linux pgd/pmd and load it in the operation
* above.
*/
- rlwimi r11, r10, 0, 26, 27
- /* Insert the WriteThru flag into the TWC from the Linux PTE.
- * It is bit 25 in the Linux PTE and bit 30 in the TWC
- */
- rlwimi r11, r10, 32-5, 30, 30
- MTSPR_CPU6(SPRN_MD_TWC, r11, r3)
-
- /* In 4k pages mode, SPS (bit 28) in RPN must match PS[1] (bit 29)
- * In 16k pages mode, SPS is always 1 */
-#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES)
- rlwimi r10, r11, 1, MD_SPS16K
-#endif
- /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set.
- * We also need to know if the insn is a load/store, so:
- * Clear _PAGE_PRESENT and load that which will
- * trap into DTLB Error with store bit set accordinly.
- */
- /* PRESENT=0x1, ACCESSED=0x20
- * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5));
- * r10 = (r10 & ~PRESENT) | r11;
- */
+ rlwimi r11, r10, 0, _PAGE_GUARDED
#ifdef CONFIG_SWAP
- rlwinm r11, r10, 32-5, _PAGE_PRESENT
- and r11, r11, r10
- rlwimi r10, r11, 0, _PAGE_PRESENT
+ /* _PAGE_ACCESSED has to be set. We use second APG bit for that, 0
+ * on that bit will represent a Non Access group
+ */
+ rlwinm r11, r10, 31, _PAGE_ACCESSED >> 1
#endif
+ mtspr SPRN_MD_TWC, r11
+
/* The Linux PTE won't go exactly into the MMU TLB.
- * Software indicator bits 22 and 28 must be clear.
* Software indicator bits 24, 25, 26, and 27 must be
* set. All other Linux PTE bits control the behavior
* of the MMU.
*/
li r11, RPN_PATTERN
-#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES)
rlwimi r10, r11, 0, 24, 27 /* Set 24-27 */
-#else
- rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */
-#endif
- rlwimi r10, r11, 0, 20, 20 /* clear 20 */
- MTSPR_CPU6(SPRN_MD_RPN, r10, r3) /* Update TLB entry */
+ mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
/* Restore registers */
- mfspr r3, SPRN_SPRG_SCRATCH2
mtspr SPRN_DAR, r11 /* Tag DAR */
- EXCEPTION_EPILOG_0
+_ENTRY(dtlb_miss_exit_1)
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ mfspr r12, SPRN_SPRG_SCRATCH2
+ rfi
+#ifdef CONFIG_PERF_EVENTS
+_ENTRY(dtlb_miss_perf)
+ lis r10, (dtlb_miss_counter - PAGE_OFFSET)@ha
+ lwz r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
+ addi r11, r11, 1
+ stw r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
+#endif
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ mfspr r12, SPRN_SPRG_SCRATCH2
rfi
#ifdef CONFIG_HUGETLB_PAGE
@@ -566,7 +519,6 @@ _ENTRY(DTLBMiss_jmp)
rlwinm r10, r11, 0, ~HUGEPD_SHIFT_MASK
#endif
lwz r10, 0(r10) /* Get the pte */
- rlwinm r11, r11, 0, 0xf
b 4b
20: /* 512k pages */
@@ -575,7 +527,6 @@ _ENTRY(DTLBMiss_jmp)
/* Add level 2 base */
rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1) - 1
lwz r10, 0(r10) /* Get the pte */
- rlwinm r11, r11, 0, 0xf
b 4b
#endif
@@ -601,7 +552,8 @@ itlbie:
*/
. = 0x1400
DataTLBError:
- EXCEPTION_PROLOG_0
+ mtspr SPRN_SPRG_SCRATCH0, r10
+ mtspr SPRN_SPRG_SCRATCH1, r11
mfcr r10
mfspr r11, SPRN_DAR
@@ -636,7 +588,8 @@ dtlbie:
*/
. = 0x1c00
DataBreakpoint:
- EXCEPTION_PROLOG_0
+ mtspr SPRN_SPRG_SCRATCH0, r10
+ mtspr SPRN_SPRG_SCRATCH1, r11
mfcr r10
mfspr r11, SPRN_SRR0
cmplwi cr0, r11, (dtlbie - PAGE_OFFSET)@l
@@ -652,13 +605,15 @@ DataBreakpoint:
EXC_XFER_EE(0x1c00, do_break)
11:
mtcr r10
- EXCEPTION_EPILOG_0
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
rfi
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#ifdef CONFIG_PERF_EVENTS
. = 0x1d00
InstructionBreakpoint:
- EXCEPTION_PROLOG_0
+ mtspr SPRN_SPRG_SCRATCH0, r10
+ mtspr SPRN_SPRG_SCRATCH1, r11
lis r10, (instruction_counter - PAGE_OFFSET)@ha
lwz r11, (instruction_counter - PAGE_OFFSET)@l(r10)
addi r11, r11, -1
@@ -666,7 +621,8 @@ InstructionBreakpoint:
lis r10, 0xffff
ori r10, r10, 0x01
mtspr SPRN_COUNTA, r10
- EXCEPTION_EPILOG_0
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
rfi
#else
EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE)
@@ -681,51 +637,57 @@ InstructionBreakpoint:
* not enough space in the DataStoreTLBMiss area.
*/
DTLBMissIMMR:
- mtcr r3
- /* Set 512k byte guarded page and mark it valid */
- li r10, MD_PS512K | MD_GUARDED | MD_SVALID
- MTSPR_CPU6(SPRN_MD_TWC, r10, r11)
+ mtcr r12
+ /* Set 512k byte guarded page and mark it valid and accessed */
+ li r10, MD_PS512K | MD_GUARDED | MD_SVALID | M_APG2
+ mtspr SPRN_MD_TWC, r10
mfspr r10, SPRN_IMMR /* Get current IMMR */
rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */
- ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \
+ ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \
_PAGE_PRESENT | _PAGE_NO_CACHE
- MTSPR_CPU6(SPRN_MD_RPN, r10, r11) /* Update TLB entry */
+ mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
li r11, RPN_PATTERN
mtspr SPRN_DAR, r11 /* Tag DAR */
- mfspr r3, SPRN_SPRG_SCRATCH2
- EXCEPTION_EPILOG_0
+_ENTRY(dtlb_miss_exit_2)
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ mfspr r12, SPRN_SPRG_SCRATCH2
rfi
DTLBMissLinear:
- mtcr r3
- /* Set 8M byte page and mark it valid */
- li r11, MD_PS8MEG | MD_SVALID
- MTSPR_CPU6(SPRN_MD_TWC, r11, r3)
+ mtcr r12
+ /* Set 8M byte page and mark it valid and accessed */
+ li r11, MD_PS8MEG | MD_SVALID | M_APG2
+ mtspr SPRN_MD_TWC, r11
rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */
- ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \
+ ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \
_PAGE_PRESENT
- MTSPR_CPU6(SPRN_MD_RPN, r10, r11) /* Update TLB entry */
+ mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
li r11, RPN_PATTERN
mtspr SPRN_DAR, r11 /* Tag DAR */
- mfspr r3, SPRN_SPRG_SCRATCH2
- EXCEPTION_EPILOG_0
+_ENTRY(dtlb_miss_exit_3)
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ mfspr r12, SPRN_SPRG_SCRATCH2
rfi
#ifndef CONFIG_PIN_TLB_TEXT
ITLBMissLinear:
- mtcr r3
- /* Set 8M byte page and mark it valid */
- li r11, MI_PS8MEG | MI_SVALID | _PAGE_EXEC
- MTSPR_CPU6(SPRN_MI_TWC, r11, r3)
+ mtcr r12
+ /* Set 8M byte page and mark it valid,accessed */
+ li r11, MI_PS8MEG | MI_SVALID | M_APG2
+ mtspr SPRN_MI_TWC, r11
rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */
- ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \
+ ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \
_PAGE_PRESENT
- MTSPR_CPU6(SPRN_MI_RPN, r10, r11) /* Update TLB entry */
+ mtspr SPRN_MI_RPN, r10 /* Update TLB entry */
- mfspr r3, SPRN_SPRG_SCRATCH2
- EXCEPTION_EPILOG_0
+_ENTRY(itlb_miss_exit_2)
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ mfspr r12, SPRN_SPRG_SCRATCH2
rfi
#endif
@@ -933,13 +895,6 @@ start_here:
*/
lis r6, swapper_pg_dir@ha
tophys(r6,r6)
-#ifdef CONFIG_8xx_CPU6
- lis r4, cpu6_errata_word@h
- ori r4, r4, cpu6_errata_word@l
- li r3, 0x3f80
- stw r3, 12(r4)
- lwz r3, 12(r4)
-#endif
mtspr SPRN_M_TW, r6
lis r4,2f@h
ori r4,r4,2f@l
@@ -1004,8 +959,8 @@ initial_mmu:
lis r8, KERNELBASE@h /* Create vaddr for TLB */
ori r8, r8, MI_EVALID /* Mark it valid */
mtspr SPRN_MI_EPN, r8
- li r8, MI_PS8MEG | (2 << 5) /* Set 8M byte page, APG 2 */
- ori r8, r8, MI_SVALID /* Make it valid */
+ li r8, MI_PS8MEG /* Set 8M byte page */
+ ori r8, r8, MI_SVALID | M_APG2 /* Make it valid, APG 2 */
mtspr SPRN_MI_TWC, r8
li r8, MI_BOOTINIT /* Create RPN for address 0 */
mtspr SPRN_MI_RPN, r8 /* Store TLB entry */
@@ -1032,7 +987,7 @@ initial_mmu:
ori r8, r8, MD_EVALID /* Mark it valid */
mtspr SPRN_MD_EPN, r8
li r8, MD_PS512K | MD_GUARDED /* Set 512k byte page */
- ori r8, r8, MD_SVALID /* Make it valid */
+ ori r8, r8, MD_SVALID | M_APG2 /* Make it valid and accessed */
mtspr SPRN_MD_TWC, r8
mr r8, r9 /* Create paddr for TLB */
ori r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */
@@ -1061,7 +1016,7 @@ initial_mmu:
#endif
/* Disable debug mode entry on breakpoints */
mfspr r8, SPRN_DER
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#ifdef CONFIG_PERF_EVENTS
rlwinm r8, r8, 0, ~0xc
#else
rlwinm r8, r8, 0, ~0x8
@@ -1094,13 +1049,7 @@ swapper_pg_dir:
abatron_pteptrs:
.space 8
-#ifdef CONFIG_8xx_CPU6
- .globl cpu6_errata_word
-cpu6_errata_word:
- .space 16
-#endif
-
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#ifdef CONFIG_PERF_EVENTS
.globl itlb_miss_counter
itlb_miss_counter:
.space 4
diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S
index 48c21acef915..2b269315d377 100644
--- a/arch/powerpc/kernel/idle_book3e.S
+++ b/arch/powerpc/kernel/idle_book3e.S
@@ -17,6 +17,7 @@
#include <asm/processor.h>
#include <asm/thread_info.h>
#include <asm/epapr_hcalls.h>
+#include <asm/hw_irq.h>
/* 64-bit version only for now */
#ifdef CONFIG_PPC64
@@ -46,8 +47,8 @@ _GLOBAL(\name)
bl trace_hardirqs_on
addi r1,r1,128
#endif
- li r0,1
- stb r0,PACASOFTIRQEN(r13)
+ li r0,IRQS_ENABLED
+ stb r0,PACAIRQSOFTMASK(r13)
/* Interrupts will make use return to LR, so get something we want
* in there
diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S
index f57a19348bdd..08faa93755f9 100644
--- a/arch/powerpc/kernel/idle_power4.S
+++ b/arch/powerpc/kernel/idle_power4.S
@@ -15,6 +15,7 @@
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/irqflags.h>
+#include <asm/hw_irq.h>
#undef DEBUG
@@ -53,8 +54,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP)
mfmsr r7
#endif /* CONFIG_TRACE_IRQFLAGS */
- li r0,1
- stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */
+ li r0,IRQS_ENABLED
+ stb r0,PACAIRQSOFTMASK(r13) /* we'll hard-enable shortly */
BEGIN_FTR_SECTION
DSSALL
sync
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index b7a84522e652..f88038847790 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -67,6 +67,7 @@
#include <asm/smp.h>
#include <asm/livepatch.h>
#include <asm/asm-prototypes.h>
+#include <asm/hw_irq.h>
#ifdef CONFIG_PPC64
#include <asm/paca.h>
@@ -106,12 +107,6 @@ static inline notrace unsigned long get_irq_happened(void)
return happened;
}
-static inline notrace void set_soft_enabled(unsigned long enable)
-{
- __asm__ __volatile__("stb %0,%1(13)"
- : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
-}
-
static inline notrace int decrementer_check_overflow(void)
{
u64 now = get_tb_or_rtc();
@@ -191,6 +186,11 @@ notrace unsigned int __check_irq_replay(void)
return 0x900;
}
+ if (happened & PACA_IRQ_PMI) {
+ local_paca->irq_happened &= ~PACA_IRQ_PMI;
+ return 0xf00;
+ }
+
if (happened & PACA_IRQ_EE) {
local_paca->irq_happened &= ~PACA_IRQ_EE;
return 0x500;
@@ -224,15 +224,16 @@ notrace unsigned int __check_irq_replay(void)
return 0;
}
-notrace void arch_local_irq_restore(unsigned long en)
+notrace void arch_local_irq_restore(unsigned long mask)
{
unsigned char irq_happened;
unsigned int replay;
/* Write the new soft-enabled value */
- set_soft_enabled(en);
- if (!en)
+ irq_soft_mask_set(mask);
+ if (mask)
return;
+
/*
* From this point onward, we can take interrupts, preempt,
* etc... unless we got hard-disabled. We check if an event
@@ -263,7 +264,7 @@ notrace void arch_local_irq_restore(unsigned long en)
*/
if (unlikely(irq_happened != PACA_IRQ_HARD_DIS))
__hard_irq_disable();
-#ifdef CONFIG_TRACE_IRQFLAGS
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
else {
/*
* We should already be hard disabled here. We had bugs
@@ -274,9 +275,9 @@ notrace void arch_local_irq_restore(unsigned long en)
if (WARN_ON(mfmsr() & MSR_EE))
__hard_irq_disable();
}
-#endif /* CONFIG_TRACE_IRQFLAGS */
+#endif
- set_soft_enabled(0);
+ irq_soft_mask_set(IRQS_ALL_DISABLED);
trace_hardirqs_off();
/*
@@ -288,7 +289,7 @@ notrace void arch_local_irq_restore(unsigned long en)
/* We can soft-enable now */
trace_hardirqs_on();
- set_soft_enabled(1);
+ irq_soft_mask_set(IRQS_ENABLED);
/*
* And replay if we have to. This will return with interrupts
@@ -363,7 +364,7 @@ bool prep_irq_for_idle(void)
* of entering the low power state.
*/
local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
- local_paca->soft_enabled = 1;
+ irq_soft_mask_set(IRQS_ENABLED);
/* Tell the caller to enter the low power state */
return true;
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 71e8a1b8c86e..efdd16a79075 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -495,37 +495,123 @@ long machine_check_early(struct pt_regs *regs)
return handled;
}
-long hmi_exception_realmode(struct pt_regs *regs)
+/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
+static enum {
+ DTRIG_UNKNOWN,
+ DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */
+ DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */
+} hmer_debug_trig_function;
+
+static int init_debug_trig_function(void)
{
- __this_cpu_inc(irq_stat.hmi_exceptions);
-
-#ifdef CONFIG_PPC_BOOK3S_64
- /* Workaround for P9 vector CI loads (see p9_hmi_special_emu) */
- if (pvr_version_is(PVR_POWER9)) {
- unsigned long hmer = mfspr(SPRN_HMER);
-
- /* Do we have the debug bit set */
- if (hmer & PPC_BIT(17)) {
- hmer &= ~PPC_BIT(17);
- mtspr(SPRN_HMER, hmer);
-
- /*
- * Now to avoid problems with soft-disable we
- * only do the emulation if we are coming from
- * user space
- */
- if (user_mode(regs))
- local_paca->hmi_p9_special_emu = 1;
-
- /*
- * Don't bother going to OPAL if that's the
- * only relevant bit.
- */
- if (!(hmer & mfspr(SPRN_HMEER)))
- return local_paca->hmi_p9_special_emu;
+ int pvr;
+ struct device_node *cpun;
+ struct property *prop = NULL;
+ const char *str;
+
+ /* First look in the device tree */
+ preempt_disable();
+ cpun = of_get_cpu_node(smp_processor_id(), NULL);
+ if (cpun) {
+ of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
+ prop, str) {
+ if (strcmp(str, "bit17-vector-ci-load") == 0)
+ hmer_debug_trig_function = DTRIG_VECTOR_CI;
+ else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
+ hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
}
+ of_node_put(cpun);
+ }
+ preempt_enable();
+
+ /* If we found the property, don't look at PVR */
+ if (prop)
+ goto out;
+
+ pvr = mfspr(SPRN_PVR);
+ /* Check for POWER9 Nimbus (scale-out) */
+ if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
+ /* DD2.2 and later */
+ if ((pvr & 0xfff) >= 0x202)
+ hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
+ /* DD2.0 and DD2.1 - used for vector CI load emulation */
+ else if ((pvr & 0xfff) >= 0x200)
+ hmer_debug_trig_function = DTRIG_VECTOR_CI;
+ }
+
+ out:
+ switch (hmer_debug_trig_function) {
+ case DTRIG_VECTOR_CI:
+ pr_debug("HMI debug trigger used for vector CI load\n");
+ break;
+ case DTRIG_SUSPEND_ESCAPE:
+ pr_debug("HMI debug trigger used for TM suspend escape\n");
+ break;
+ default:
+ break;
}
-#endif /* CONFIG_PPC_BOOK3S_64 */
+ return 0;
+}
+__initcall(init_debug_trig_function);
+
+/*
+ * Handle HMIs that occur as a result of a debug trigger.
+ * Return values:
+ * -1 means this is not a HMI cause that we know about
+ * 0 means no further handling is required
+ * 1 means further handling is required
+ */
+long hmi_handle_debugtrig(struct pt_regs *regs)
+{
+ unsigned long hmer = mfspr(SPRN_HMER);
+ long ret = 0;
+
+ /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
+ if (!((hmer & HMER_DEBUG_TRIG)
+ && hmer_debug_trig_function != DTRIG_UNKNOWN))
+ return -1;
+
+ hmer &= ~HMER_DEBUG_TRIG;
+ /* HMER is a write-AND register */
+ mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
+
+ switch (hmer_debug_trig_function) {
+ case DTRIG_VECTOR_CI:
+ /*
+ * Now to avoid problems with soft-disable we
+ * only do the emulation if we are coming from
+ * host user space
+ */
+ if (regs && user_mode(regs))
+ ret = local_paca->hmi_p9_special_emu = 1;
+
+ break;
+
+ default:
+ break;
+ }
+
+ /*
+ * See if any other HMI causes remain to be handled
+ */
+ if (hmer & mfspr(SPRN_HMEER))
+ return -1;
+
+ return ret;
+}
+
+/*
+ * Return values:
+ */
+long hmi_exception_realmode(struct pt_regs *regs)
+{
+ int ret;
+
+ __this_cpu_inc(irq_stat.hmi_exceptions);
+
+ ret = hmi_handle_debugtrig(regs);
+ if (ret >= 0)
+ return ret;
wait_for_subcore_guest_exit();
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 644f7040b91c..fe6fc63251fe 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -58,115 +58,6 @@ static unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
return pte_pfn(*ptep);
}
-static void flush_tlb_206(unsigned int num_sets, unsigned int action)
-{
- unsigned long rb;
- unsigned int i;
-
- switch (action) {
- case TLB_INVAL_SCOPE_GLOBAL:
- rb = TLBIEL_INVAL_SET;
- break;
- case TLB_INVAL_SCOPE_LPID:
- rb = TLBIEL_INVAL_SET_LPID;
- break;
- default:
- BUG();
- break;
- }
-
- asm volatile("ptesync" : : : "memory");
- for (i = 0; i < num_sets; i++) {
- asm volatile("tlbiel %0" : : "r" (rb));
- rb += 1 << TLBIEL_INVAL_SET_SHIFT;
- }
- asm volatile("ptesync" : : : "memory");
-}
-
-static void flush_tlb_300(unsigned int num_sets, unsigned int action)
-{
- unsigned long rb;
- unsigned int i;
- unsigned int r;
-
- switch (action) {
- case TLB_INVAL_SCOPE_GLOBAL:
- rb = TLBIEL_INVAL_SET;
- break;
- case TLB_INVAL_SCOPE_LPID:
- rb = TLBIEL_INVAL_SET_LPID;
- break;
- default:
- BUG();
- break;
- }
-
- asm volatile("ptesync" : : : "memory");
-
- if (early_radix_enabled())
- r = 1;
- else
- r = 0;
-
- /*
- * First flush table/PWC caches with set 0, then flush the
- * rest of the sets, partition scope. Radix must then do it
- * all again with process scope. Hash just has to flush
- * process table.
- */
- asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : :
- "r"(rb), "r"(0), "i"(2), "i"(0), "r"(r));
- for (i = 1; i < num_sets; i++) {
- unsigned long set = i * (1<<TLBIEL_INVAL_SET_SHIFT);
-
- asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : :
- "r"(rb+set), "r"(0), "i"(2), "i"(0), "r"(r));
- }
-
- asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : :
- "r"(rb), "r"(0), "i"(2), "i"(1), "r"(r));
- if (early_radix_enabled()) {
- for (i = 1; i < num_sets; i++) {
- unsigned long set = i * (1<<TLBIEL_INVAL_SET_SHIFT);
-
- asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : :
- "r"(rb+set), "r"(0), "i"(2), "i"(1), "r"(r));
- }
- }
-
- asm volatile("ptesync" : : : "memory");
-}
-
-/*
- * Generic routines to flush TLB on POWER processors. These routines
- * are used as flush_tlb hook in the cpu_spec.
- *
- * action => TLB_INVAL_SCOPE_GLOBAL: Invalidate all TLBs.
- * TLB_INVAL_SCOPE_LPID: Invalidate TLB for current LPID.
- */
-void __flush_tlb_power7(unsigned int action)
-{
- flush_tlb_206(POWER7_TLB_SETS, action);
-}
-
-void __flush_tlb_power8(unsigned int action)
-{
- flush_tlb_206(POWER8_TLB_SETS, action);
-}
-
-void __flush_tlb_power9(unsigned int action)
-{
- unsigned int num_sets;
-
- if (early_radix_enabled())
- num_sets = POWER9_TLB_SETS_RADIX;
- else
- num_sets = POWER9_TLB_SETS_HASH;
-
- flush_tlb_300(num_sets, action);
-}
-
-
/* flush SLBs and reload */
#ifdef CONFIG_PPC_BOOK3S_64
static void flush_and_reload_slb(void)
@@ -226,10 +117,8 @@ static int mce_flush(int what)
return 1;
}
if (what == MCE_FLUSH_TLB) {
- if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
- cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
- return 1;
- }
+ tlbiel_all();
+ return 1;
}
return 0;
diff --git a/arch/powerpc/kernel/module.lds b/arch/powerpc/kernel/module.lds
new file mode 100644
index 000000000000..cea5dc124be4
--- /dev/null
+++ b/arch/powerpc/kernel/module.lds
@@ -0,0 +1,8 @@
+/* Force alignment of .toc section. */
+SECTIONS
+{
+ .toc 0 : ALIGN(256)
+ {
+ *(.got .toc)
+ }
+}
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 218971ac7e04..a2636c250b7b 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -348,8 +348,11 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
char *p;
if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0)
me->arch.stubs_section = i;
- else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0)
+ else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0) {
me->arch.toc_section = i;
+ if (sechdrs[i].sh_addralign < 8)
+ sechdrs[i].sh_addralign = 8;
+ }
else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0)
dedotify_versions((void *)hdr + sechdrs[i].sh_offset,
sechdrs[i].sh_size);
@@ -387,12 +390,15 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
return 0;
}
-/* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this
- gives the value maximum span in an instruction which uses a signed
- offset) */
+/*
+ * r2 is the TOC pointer: it actually points 0x8000 into the TOC (this gives the
+ * value maximum span in an instruction which uses a signed offset). Round down
+ * to a 256 byte boundary for the odd case where we are setting up r2 without a
+ * .toc section.
+ */
static inline unsigned long my_r2(const Elf64_Shdr *sechdrs, struct module *me)
{
- return sechdrs[me->arch.toc_section].sh_addr + 0x8000;
+ return (sechdrs[me->arch.toc_section].sh_addr & ~0xfful) + 0x8000;
}
/* Both low and high 16 bits are added as SIGNED additions, so if low
@@ -501,12 +507,22 @@ static bool is_early_mcount_callsite(u32 *instruction)
restore r2. */
static int restore_r2(u32 *instruction, struct module *me)
{
- if (is_early_mcount_callsite(instruction - 1))
+ u32 *prev_insn = instruction - 1;
+
+ if (is_early_mcount_callsite(prev_insn))
+ return 1;
+
+ /*
+ * Make sure the branch isn't a sibling call. Sibling calls aren't
+ * "link" branches and they don't return, so they don't need the r2
+ * restore afterwards.
+ */
+ if (!instr_is_relative_link_branch(*prev_insn))
return 1;
if (*instruction != PPC_INST_NOP) {
- pr_err("%s: Expect noop after relocate, got %08x\n",
- me->name, *instruction);
+ pr_err("%s: Expected nop after call, got %08x at %pS\n",
+ me->name, *instruction, instruction);
return 0;
}
/* ld r2,R2_STACK_OFFSET(r1) */
@@ -628,7 +644,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
case R_PPC_REL24:
/* FIXME: Handle weak symbols here --RR */
- if (sym->st_shndx == SHN_UNDEF) {
+ if (sym->st_shndx == SHN_UNDEF ||
+ sym->st_shndx == SHN_LIVEPATCH) {
/* External: go via stub */
value = stub_for_addr(sechdrs, value, me);
if (!value)
diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S
index 52fc864cdec4..98a3aeeb3c8c 100644
--- a/arch/powerpc/kernel/optprobes_head.S
+++ b/arch/powerpc/kernel/optprobes_head.S
@@ -58,7 +58,7 @@ optprobe_template_entry:
std r5,_XER(r1)
mfcr r5
std r5,_CCR(r1)
- lbz r5,PACASOFTIRQEN(r13)
+ lbz r5,PACAIRQSOFTMASK(r13)
std r5,SOFTE(r1)
/*
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index d6597038931d..95ffedf14885 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -18,6 +18,8 @@
#include <asm/pgtable.h>
#include <asm/kexec.h>
+#include "setup.h"
+
#ifdef CONFIG_PPC_BOOK3S
/*
@@ -208,15 +210,14 @@ void __init allocate_pacas(void)
u64 limit;
int cpu;
- limit = ppc64_rma_size;
-
#ifdef CONFIG_PPC_BOOK3S_64
/*
- * We can't take SLB misses on the paca, and we want to access them
- * in real mode, so allocate them within the RMA and also within
- * the first segment.
+ * We access pacas in real mode, and cannot take SLB faults
+ * on them when in virtual mode, so allocate them accordingly.
*/
- limit = min(0x10000000ULL, limit);
+ limit = min(ppc64_bolted_size(), ppc64_rma_size);
+#else
+ limit = ppc64_rma_size;
#endif
paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids);
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 590f4d0a6cb1..208e623b2557 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -249,8 +249,31 @@ resource_size_t pcibios_iov_resource_alignment(struct pci_dev *pdev, int resno)
return pci_iov_resource_size(pdev, resno);
}
+
+int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+ if (ppc_md.pcibios_sriov_enable)
+ return ppc_md.pcibios_sriov_enable(pdev, num_vfs);
+
+ return 0;
+}
+
+int pcibios_sriov_disable(struct pci_dev *pdev)
+{
+ if (ppc_md.pcibios_sriov_disable)
+ return ppc_md.pcibios_sriov_disable(pdev);
+
+ return 0;
+}
+
#endif /* CONFIG_PCI_IOV */
+void pcibios_bus_add_device(struct pci_dev *pdev)
+{
+ if (ppc_md.pcibios_bus_add_device)
+ ppc_md.pcibios_bus_add_device(pdev);
+}
+
static resource_size_t pcibios_io_size(const struct pci_controller *hose)
{
#ifdef CONFIG_PPC64
@@ -1276,8 +1299,8 @@ static void pcibios_allocate_bus_resources(struct pci_bus *bus)
i + PCI_BRIDGE_RESOURCES) == 0)
continue;
}
- pr_warning("PCI: Cannot allocate resource region "
- "%d of PCI bridge %d, will remap\n", i, bus->number);
+ pr_warn("PCI: Cannot allocate resource region %d of PCI bridge %d, will remap\n",
+ i, bus->number);
clear_resource:
/* The resource might be figured out when doing
* reassignment based on the resources required
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 0e395afbf0f4..ab147a1909c8 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -156,10 +156,8 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
pdn->parent = parent;
pdn->busno = busno;
pdn->devfn = devfn;
-#ifdef CONFIG_PPC_POWERNV
pdn->vf_index = vf_index;
pdn->pe_number = IODA_INVALID_PE;
-#endif
INIT_LIST_HEAD(&pdn->child_list);
INIT_LIST_HEAD(&pdn->list);
list_add_tail(&pdn->list, &parent->child_list);
@@ -226,9 +224,7 @@ void remove_dev_pci_data(struct pci_dev *pdev)
*/
if (pdev->is_virtfn) {
pdn = pci_get_pdn(pdev);
-#ifdef CONFIG_PPC_POWERNV
pdn->pe_number = IODA_INVALID_PE;
-#endif
return;
}
@@ -294,9 +290,7 @@ struct pci_dn *pci_add_device_node_info(struct pci_controller *hose,
return NULL;
dn->data = pdn;
pdn->phb = hose;
-#ifdef CONFIG_PPC_POWERNV
pdn->pe_number = IODA_INVALID_PE;
-#endif
regs = of_get_property(dn, "reg", NULL);
if (regs) {
u32 addr = of_read_number(regs, 1);
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 0d790f8432d2..20ceec4a5f5e 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -38,7 +38,7 @@ static u32 get_int_prop(struct device_node *np, const char *name, u32 def)
* @addr0: value of 1st cell of a device tree PCI address.
* @bridge: Set this flag if the address is from a bridge 'ranges' property
*/
-static unsigned int pci_parse_of_flags(u32 addr0, int bridge)
+unsigned int pci_parse_of_flags(u32 addr0, int bridge)
{
unsigned int flags = 0;
diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c
index 56548bf6231f..9bfbd800d32f 100644
--- a/arch/powerpc/kernel/proc_powerpc.c
+++ b/arch/powerpc/kernel/proc_powerpc.c
@@ -63,7 +63,7 @@ static int __init proc_ppc64_init(void)
{
struct proc_dir_entry *pde;
- pde = proc_create_data("powerpc/systemcfg", S_IFREG|S_IRUGO, NULL,
+ pde = proc_create_data("powerpc/systemcfg", S_IFREG | 0444, NULL,
&page_map_fops, vdso_data);
if (!pde)
return 1;
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 4208cbe2fb7f..1738c4127b32 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -42,6 +42,7 @@
#include <linux/hw_breakpoint.h>
#include <linux/uaccess.h>
#include <linux/elf-randomize.h>
+#include <linux/pkeys.h>
#include <asm/pgtable.h>
#include <asm/io.h>
@@ -57,6 +58,7 @@
#include <asm/debug.h>
#ifdef CONFIG_PPC64
#include <asm/firmware.h>
+#include <asm/hw_irq.h>
#endif
#include <asm/code-patching.h>
#include <asm/exec.h>
@@ -1097,6 +1099,8 @@ static inline void save_sprs(struct thread_struct *t)
t->tar = mfspr(SPRN_TAR);
}
#endif
+
+ thread_pkey_regs_save(t);
}
static inline void restore_sprs(struct thread_struct *old_thread,
@@ -1136,6 +1140,8 @@ static inline void restore_sprs(struct thread_struct *old_thread,
old_thread->tidr != new_thread->tidr)
mtspr(SPRN_TIDR, new_thread->tidr);
#endif
+
+ thread_pkey_regs_restore(new_thread, old_thread);
}
#ifdef CONFIG_PPC_BOOK3S_64
@@ -1404,7 +1410,7 @@ void show_regs(struct pt_regs * regs)
print_msr_bits(regs->msr);
pr_cont(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer);
trap = TRAP(regs);
- if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
+ if ((TRAP(regs) != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
pr_cont("CFAR: "REG" ", regs->orig_gpr3);
if (trap == 0x200 || trap == 0x300 || trap == 0x600)
#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
@@ -1504,14 +1510,15 @@ static int assign_thread_tidr(void)
{
int index;
int err;
+ unsigned long flags;
again:
if (!ida_pre_get(&vas_thread_ida, GFP_KERNEL))
return -ENOMEM;
- spin_lock(&vas_thread_id_lock);
+ spin_lock_irqsave(&vas_thread_id_lock, flags);
err = ida_get_new_above(&vas_thread_ida, 1, &index);
- spin_unlock(&vas_thread_id_lock);
+ spin_unlock_irqrestore(&vas_thread_id_lock, flags);
if (err == -EAGAIN)
goto again;
@@ -1519,9 +1526,9 @@ again:
return err;
if (index > MAX_THREAD_CONTEXT) {
- spin_lock(&vas_thread_id_lock);
+ spin_lock_irqsave(&vas_thread_id_lock, flags);
ida_remove(&vas_thread_ida, index);
- spin_unlock(&vas_thread_id_lock);
+ spin_unlock_irqrestore(&vas_thread_id_lock, flags);
return -ENOMEM;
}
@@ -1530,9 +1537,11 @@ again:
static void free_thread_tidr(int id)
{
- spin_lock(&vas_thread_id_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&vas_thread_id_lock, flags);
ida_remove(&vas_thread_ida, id);
- spin_unlock(&vas_thread_id_lock);
+ spin_unlock_irqrestore(&vas_thread_id_lock, flags);
}
/*
@@ -1584,6 +1593,7 @@ int set_thread_tidr(struct task_struct *t)
return 0;
}
+EXPORT_SYMBOL_GPL(set_thread_tidr);
#endif /* CONFIG_PPC64 */
@@ -1669,7 +1679,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
childregs->gpr[14] = ppc_function_entry((void *)usp);
#ifdef CONFIG_PPC64
clear_tsk_thread_flag(p, TIF_32BIT);
- childregs->softe = 1;
+ childregs->softe = IRQS_ENABLED;
#endif
childregs->gpr[15] = kthread_arg;
p->thread.regs = NULL; /* no user register state */
@@ -1860,6 +1870,8 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
current->thread.tm_tfiar = 0;
current->thread.load_tm = 0;
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
+ thread_pkey_regs_init(&current->thread);
}
EXPORT_SYMBOL(start_thread);
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index b15bae265c90..4dffef947b8a 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -59,6 +59,7 @@
#include <asm/epapr_hcalls.h>
#include <asm/firmware.h>
#include <asm/dt_cpu_ftrs.h>
+#include <asm/drmem.h>
#include <mm/mmu_decl.h>
@@ -455,92 +456,74 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned long node,
#ifdef CONFIG_PPC_PSERIES
/*
- * Interpret the ibm,dynamic-memory property in the
- * /ibm,dynamic-reconfiguration-memory node.
+ * Interpret the ibm dynamic reconfiguration memory LMBs.
* This contains a list of memory blocks along with NUMA affinity
* information.
*/
-static int __init early_init_dt_scan_drconf_memory(unsigned long node)
+static void __init early_init_drmem_lmb(struct drmem_lmb *lmb,
+ const __be32 **usm)
{
- const __be32 *dm, *ls, *usm;
- int l;
- unsigned long n, flags;
- u64 base, size, memblock_size;
- unsigned int is_kexec_kdump = 0, rngs;
-
- ls = of_get_flat_dt_prop(node, "ibm,lmb-size", &l);
- if (ls == NULL || l < dt_root_size_cells * sizeof(__be32))
- return 0;
- memblock_size = dt_mem_next_cell(dt_root_size_cells, &ls);
+ u64 base, size;
+ int is_kexec_kdump = 0, rngs;
- dm = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &l);
- if (dm == NULL || l < sizeof(__be32))
- return 0;
+ base = lmb->base_addr;
+ size = drmem_lmb_size();
+ rngs = 1;
- n = of_read_number(dm++, 1); /* number of entries */
- if (l < (n * (dt_root_addr_cells + 4) + 1) * sizeof(__be32))
- return 0;
+ /*
+ * Skip this block if the reserved bit is set in flags
+ * or if the block is not assigned to this partition.
+ */
+ if ((lmb->flags & DRCONF_MEM_RESERVED) ||
+ !(lmb->flags & DRCONF_MEM_ASSIGNED))
+ return;
- /* check if this is a kexec/kdump kernel. */
- usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory",
- &l);
- if (usm != NULL)
+ if (*usm)
is_kexec_kdump = 1;
- for (; n != 0; --n) {
- base = dt_mem_next_cell(dt_root_addr_cells, &dm);
- flags = of_read_number(&dm[3], 1);
- /* skip DRC index, pad, assoc. list index, flags */
- dm += 4;
- /* skip this block if the reserved bit is set in flags
- or if the block is not assigned to this partition */
- if ((flags & DRCONF_MEM_RESERVED) ||
- !(flags & DRCONF_MEM_ASSIGNED))
- continue;
- size = memblock_size;
- rngs = 1;
+ if (is_kexec_kdump) {
+ /*
+ * For each memblock in ibm,dynamic-memory, a
+ * corresponding entry in linux,drconf-usable-memory
+ * property contains a counter 'p' followed by 'p'
+ * (base, size) duple. Now read the counter from
+ * linux,drconf-usable-memory property
+ */
+ rngs = dt_mem_next_cell(dt_root_size_cells, usm);
+ if (!rngs) /* there are no (base, size) duple */
+ return;
+ }
+
+ do {
if (is_kexec_kdump) {
- /*
- * For each memblock in ibm,dynamic-memory, a corresponding
- * entry in linux,drconf-usable-memory property contains
- * a counter 'p' followed by 'p' (base, size) duple.
- * Now read the counter from
- * linux,drconf-usable-memory property
- */
- rngs = dt_mem_next_cell(dt_root_size_cells, &usm);
- if (!rngs) /* there are no (base, size) duple */
+ base = dt_mem_next_cell(dt_root_addr_cells, usm);
+ size = dt_mem_next_cell(dt_root_size_cells, usm);
+ }
+
+ if (iommu_is_off) {
+ if (base >= 0x80000000ul)
continue;
+ if ((base + size) > 0x80000000ul)
+ size = 0x80000000ul - base;
}
- do {
- if (is_kexec_kdump) {
- base = dt_mem_next_cell(dt_root_addr_cells,
- &usm);
- size = dt_mem_next_cell(dt_root_size_cells,
- &usm);
- }
- if (iommu_is_off) {
- if (base >= 0x80000000ul)
- continue;
- if ((base + size) > 0x80000000ul)
- size = 0x80000000ul - base;
- }
- memblock_add(base, size);
- } while (--rngs);
- }
- memblock_dump_all();
- return 0;
+
+ DBG("Adding: %llx -> %llx\n", base, size);
+ memblock_add(base, size);
+ } while (--rngs);
}
-#else
-#define early_init_dt_scan_drconf_memory(node) 0
#endif /* CONFIG_PPC_PSERIES */
static int __init early_init_dt_scan_memory_ppc(unsigned long node,
const char *uname,
int depth, void *data)
{
+#ifdef CONFIG_PPC_PSERIES
if (depth == 1 &&
- strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0)
- return early_init_dt_scan_drconf_memory(node);
+ strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) {
+ walk_drmem_lmbs_early(node, early_init_drmem_lmb);
+ return 0;
+ }
+#endif
return early_init_dt_scan_memory(node, uname, depth, data);
}
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 02190e90c7ae..adf044daafd7 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -869,10 +869,12 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
.reserved2 = 0,
.reserved3 = 0,
.subprocessors = 1,
+ .byte22 = OV5_FEAT(OV5_DRMEM_V2),
.intarch = 0,
.mmu = 0,
.hash_ext = 0,
.radix_ext = 0,
+ .byte22 = OV5_FEAT(OV5_DRC_INFO),
},
/* option vector 6: IBM PAPR hints */
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index f52ad5bb7109..ca72d7391d40 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -35,6 +35,7 @@
#include <linux/context_tracking.h>
#include <linux/uaccess.h>
+#include <linux/pkeys.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/switch_to.h>
@@ -283,6 +284,18 @@ int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data)
if (regno == PT_DSCR)
return get_user_dscr(task, data);
+#ifdef CONFIG_PPC64
+ /*
+ * softe copies paca->irq_soft_mask variable state. Since irq_soft_mask is
+ * no more used as a flag, lets force usr to alway see the softe value as 1
+ * which means interrupts are not soft disabled.
+ */
+ if (regno == PT_SOFTE) {
+ *data = 1;
+ return 0;
+ }
+#endif
+
if (regno < (sizeof(struct pt_regs) / sizeof(unsigned long))) {
*data = ((unsigned long *)task->thread.regs)[regno];
return 0;
@@ -1775,6 +1788,61 @@ static int pmu_set(struct task_struct *target,
return ret;
}
#endif
+
+#ifdef CONFIG_PPC_MEM_KEYS
+static int pkey_active(struct task_struct *target,
+ const struct user_regset *regset)
+{
+ if (!arch_pkeys_enabled())
+ return -ENODEV;
+
+ return regset->n;
+}
+
+static int pkey_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ BUILD_BUG_ON(TSO(amr) + sizeof(unsigned long) != TSO(iamr));
+ BUILD_BUG_ON(TSO(iamr) + sizeof(unsigned long) != TSO(uamor));
+
+ if (!arch_pkeys_enabled())
+ return -ENODEV;
+
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.amr, 0,
+ ELF_NPKEY * sizeof(unsigned long));
+}
+
+static int pkey_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ u64 new_amr;
+ int ret;
+
+ if (!arch_pkeys_enabled())
+ return -ENODEV;
+
+ /* Only the AMR can be set from userspace */
+ if (pos != 0 || count != sizeof(new_amr))
+ return -EINVAL;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &new_amr, 0, sizeof(new_amr));
+ if (ret)
+ return ret;
+
+ /* UAMOR determines which bits of the AMR can be set from userspace. */
+ target->thread.amr = (new_amr & target->thread.uamor) |
+ (target->thread.amr & ~target->thread.uamor);
+
+ return 0;
+}
+#endif /* CONFIG_PPC_MEM_KEYS */
+
/*
* These are our native regset flavors.
*/
@@ -1809,6 +1877,9 @@ enum powerpc_regset {
REGSET_EBB, /* EBB registers */
REGSET_PMR, /* Performance Monitor Registers */
#endif
+#ifdef CONFIG_PPC_MEM_KEYS
+ REGSET_PKEY, /* AMR register */
+#endif
};
static const struct user_regset native_regsets[] = {
@@ -1914,6 +1985,13 @@ static const struct user_regset native_regsets[] = {
.active = pmu_active, .get = pmu_get, .set = pmu_set
},
#endif
+#ifdef CONFIG_PPC_MEM_KEYS
+ [REGSET_PKEY] = {
+ .core_note_type = NT_PPC_PKEY, .n = ELF_NPKEY,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = pkey_active, .get = pkey_get, .set = pkey_set
+ },
+#endif
};
static const struct user_regset_view user_ppc_native_view = {
diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c
index c8c5f3a550c2..fb070d8cad07 100644
--- a/arch/powerpc/kernel/rtas-proc.c
+++ b/arch/powerpc/kernel/rtas-proc.c
@@ -261,19 +261,19 @@ static int __init proc_rtas_init(void)
if (rtas_node == NULL)
return -ENODEV;
- proc_create("powerpc/rtas/progress", S_IRUGO|S_IWUSR, NULL,
+ proc_create("powerpc/rtas/progress", 0644, NULL,
&ppc_rtas_progress_operations);
- proc_create("powerpc/rtas/clock", S_IRUGO|S_IWUSR, NULL,
+ proc_create("powerpc/rtas/clock", 0644, NULL,
&ppc_rtas_clock_operations);
- proc_create("powerpc/rtas/poweron", S_IWUSR|S_IRUGO, NULL,
+ proc_create("powerpc/rtas/poweron", 0644, NULL,
&ppc_rtas_poweron_operations);
- proc_create("powerpc/rtas/sensors", S_IRUGO, NULL,
+ proc_create("powerpc/rtas/sensors", 0444, NULL,
&ppc_rtas_sensors_operations);
- proc_create("powerpc/rtas/frequency", S_IWUSR|S_IRUGO, NULL,
+ proc_create("powerpc/rtas/frequency", 0644, NULL,
&ppc_rtas_tone_freq_operations);
- proc_create("powerpc/rtas/volume", S_IWUSR|S_IRUGO, NULL,
+ proc_create("powerpc/rtas/volume", 0644, NULL,
&ppc_rtas_tone_volume_operations);
- proc_create("powerpc/rtas/rmo_buffer", S_IRUSR, NULL,
+ proc_create("powerpc/rtas/rmo_buffer", 0400, NULL,
&ppc_rtas_rmo_buf_ops);
return 0;
}
diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
index f6f6a8a5103a..10fabae2574d 100644
--- a/arch/powerpc/kernel/rtas_flash.c
+++ b/arch/powerpc/kernel/rtas_flash.c
@@ -727,7 +727,7 @@ static int __init rtas_flash_init(void)
const struct rtas_flash_file *f = &rtas_flash_files[i];
int token;
- if (!proc_create(f->filename, S_IRUSR | S_IWUSR, NULL, &f->fops))
+ if (!proc_create(f->filename, 0600, NULL, &f->fops))
goto enomem;
/*
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 1da8b7d8c6ca..fc600a8b1e77 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -581,7 +581,7 @@ static int __init rtas_init(void)
if (!rtas_log_buf)
return -ENODEV;
- entry = proc_create("powerpc/rtas/error_log", S_IRUSR, NULL,
+ entry = proc_create("powerpc/rtas/error_log", 0400, NULL,
&proc_rtas_log_operations);
if (!entry)
printk(KERN_ERR "Failed to create error_log proc entry\n");
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 3f33869c6486..d73ec518ef80 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -346,10 +346,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
loops_per_jiffy / (500000/HZ),
(loops_per_jiffy / (5000/HZ)) % 100);
#endif
-
-#ifdef CONFIG_SMP
seq_printf(m, "\n");
-#endif
+
/* If this is the last cpu, print the summary */
if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids)
show_cpuinfo_summary(m);
@@ -379,10 +377,10 @@ static void c_stop(struct seq_file *m, void *v)
}
const struct seq_operations cpuinfo_op = {
- .start =c_start,
- .next = c_next,
- .stop = c_stop,
- .show = show_cpuinfo,
+ .start = c_start,
+ .next = c_next,
+ .stop = c_stop,
+ .show = show_cpuinfo,
};
void __init check_for_initrd(void)
@@ -459,13 +457,13 @@ static void __init cpu_init_thread_core_maps(int tpc)
*/
void __init smp_setup_cpu_maps(void)
{
- struct device_node *dn = NULL;
+ struct device_node *dn;
int cpu = 0;
int nthreads = 1;
DBG("smp_setup_cpu_maps()\n");
- while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < nr_cpu_ids) {
+ for_each_node_by_type(dn, "cpu") {
const __be32 *intserv;
__be32 cpu_be;
int j, len;
@@ -505,6 +503,11 @@ void __init smp_setup_cpu_maps(void)
set_cpu_possible(cpu, true);
cpu++;
}
+
+ if (cpu >= nr_cpu_ids) {
+ of_node_put(dn);
+ break;
+ }
}
/* If no SMT supported, nthreads is forced to 1 */
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
index 21c18071d9d5..3fc11e30308f 100644
--- a/arch/powerpc/kernel/setup.h
+++ b/arch/powerpc/kernel/setup.h
@@ -51,6 +51,10 @@ void record_spr_defaults(void);
static inline void record_spr_defaults(void) { };
#endif
+#ifdef CONFIG_PPC64
+u64 ppc64_bolted_size(void);
+#endif
+
/*
* Having this in kvm_ppc.h makes include dependencies too
* tricky to solve for setup-common.c so have it here.
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index e67413f4a8f0..c388cc3357fa 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -10,8 +10,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#define DEBUG
-
#include <linux/export.h>
#include <linux/string.h>
#include <linux/sched.h>
@@ -69,6 +67,7 @@
#include <asm/livepatch.h>
#include <asm/opal.h>
#include <asm/cputhreads.h>
+#include <asm/hw_irq.h>
#include "setup.h"
@@ -190,6 +189,8 @@ static void __init fixup_boot_paca(void)
get_paca()->cpu_start = 1;
/* Allow percpu accesses to work until we setup percpu data */
get_paca()->data_offset = 0;
+ /* Mark interrupts disabled in PACA */
+ irq_soft_mask_set(IRQS_DISABLED);
}
static void __init configure_exceptions(void)
@@ -352,7 +353,7 @@ void __init early_setup(unsigned long dt_ptr)
void early_setup_secondary(void)
{
/* Mark interrupts disabled in PACA */
- get_paca()->soft_enabled = 0;
+ irq_soft_mask_set(IRQS_DISABLED);
/* Initialize the hash table or TLB handling */
early_init_mmu_secondary();
@@ -568,25 +569,31 @@ void __init initialize_cache_info(void)
DBG(" <- initialize_cache_info()\n");
}
-/* This returns the limit below which memory accesses to the linear
- * mapping are guarnateed not to cause a TLB or SLB miss. This is
- * used to allocate interrupt or emergency stacks for which our
- * exception entry path doesn't deal with being interrupted.
+/*
+ * This returns the limit below which memory accesses to the linear
+ * mapping are guarnateed not to cause an architectural exception (e.g.,
+ * TLB or SLB miss fault).
+ *
+ * This is used to allocate PACAs and various interrupt stacks that
+ * that are accessed early in interrupt handlers that must not cause
+ * re-entrant interrupts.
*/
-static __init u64 safe_stack_limit(void)
+__init u64 ppc64_bolted_size(void)
{
#ifdef CONFIG_PPC_BOOK3E
/* Freescale BookE bolts the entire linear mapping */
- if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
+ /* XXX: BookE ppc64_rma_limit setup seems to disagree? */
+ if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E))
return linear_map_top;
/* Other BookE, we assume the first GB is bolted */
return 1ul << 30;
#else
+ /* BookS radix, does not take faults on linear mapping */
if (early_radix_enabled())
return ULONG_MAX;
- /* BookS, the first segment is bolted */
- if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
+ /* BookS hash, the first segment is bolted */
+ if (early_mmu_has_feature(MMU_FTR_1T_SEGMENT))
return 1UL << SID_SHIFT_1T;
return 1UL << SID_SHIFT;
#endif
@@ -594,7 +601,7 @@ static __init u64 safe_stack_limit(void)
void __init irqstack_early_init(void)
{
- u64 limit = safe_stack_limit();
+ u64 limit = ppc64_bolted_size();
unsigned int i;
/*
@@ -679,7 +686,7 @@ void __init emergency_stack_init(void)
* initialized in kernel/irq.c. These are initialized here in order
* to have emergency stacks available as early as possible.
*/
- limit = min(safe_stack_limit(), ppc64_rma_size);
+ limit = min(ppc64_bolted_size(), ppc64_rma_size);
for_each_possible_cpu(i) {
struct thread_info *ti;
@@ -857,7 +864,7 @@ static void init_fallback_flush(void)
int cpu;
l1d_size = ppc64_caches.l1d.size;
- limit = min(safe_stack_limit(), ppc64_rma_size);
+ limit = min(ppc64_bolted_size(), ppc64_rma_size);
/*
* Align to L1d size, and size it at 2x L1d size, to catch possible
@@ -868,19 +875,8 @@ static void init_fallback_flush(void)
memset(l1d_flush_fallback_area, 0, l1d_size * 2);
for_each_possible_cpu(cpu) {
- /*
- * The fallback flush is currently coded for 8-way
- * associativity. Different associativity is possible, but it
- * will be treated as 8-way and may not evict the lines as
- * effectively.
- *
- * 128 byte lines are mandatory.
- */
- u64 c = l1d_size / 8;
-
paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
- paca[cpu].l1d_flush_congruence = c;
- paca[cpu].l1d_flush_sets = c / 128;
+ paca[cpu].l1d_flush_size = l1d_size;
}
}
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index aded81169648..a46de0035214 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -111,12 +111,20 @@ static inline int save_general_regs(struct pt_regs *regs,
{
elf_greg_t64 *gregs = (elf_greg_t64 *)regs;
int i;
+ /* Force usr to alway see softe as 1 (interrupts enabled) */
+ elf_greg_t64 softe = 0x1;
WARN_ON(!FULL_REGS(regs));
for (i = 0; i <= PT_RESULT; i ++) {
if (i == 14 && !FULL_REGS(regs))
i = 32;
+ if ( i == PT_SOFTE) {
+ if(__put_user((unsigned int)softe, &frame->mc_gregs[i]))
+ return -EFAULT;
+ else
+ continue;
+ }
if (__put_user((unsigned int)gregs[i], &frame->mc_gregs[i]))
return -EFAULT;
}
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 4b9ca3570344..720117690822 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -110,6 +110,8 @@ static long setup_sigcontext(struct sigcontext __user *sc,
struct pt_regs *regs = tsk->thread.regs;
unsigned long msr = regs->msr;
long err = 0;
+ /* Force usr to alway see softe as 1 (interrupts enabled) */
+ unsigned long softe = 0x1;
BUG_ON(tsk != current);
@@ -169,6 +171,7 @@ static long setup_sigcontext(struct sigcontext __user *sc,
WARN_ON(!FULL_REGS(regs));
err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE);
err |= __put_user(msr, &sc->gp_regs[PT_MSR]);
+ err |= __put_user(softe, &sc->gp_regs[PT_SOFTE]);
err |= __put_user(signr, &sc->signal);
err |= __put_user(handler, &sc->handler);
if (set != NULL)
@@ -207,7 +210,7 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
elf_vrreg_t __user *tm_v_regs = sigcontext_vmx_regs(tm_sc);
#endif
struct pt_regs *regs = tsk->thread.regs;
- unsigned long msr = tsk->thread.ckpt_regs.msr;
+ unsigned long msr = tsk->thread.regs->msr;
long err = 0;
BUG_ON(tsk != current);
@@ -216,6 +219,12 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
WARN_ON(tm_suspend_disabled);
+ /* Restore checkpointed FP, VEC, and VSX bits from ckpt_regs as
+ * it contains the correct FP, VEC, VSX state after we treclaimed
+ * the transaction and giveup_all() was called on reclaiming.
+ */
+ msr |= tsk->thread.ckpt_regs.msr & (MSR_FP | MSR_VEC | MSR_VSX);
+
/* Remove TM bits from thread's MSR. The MSR in the sigcontext
* just indicates to userland that we were doing a transaction, but we
* don't want to return in transactional state. This also ensures
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index e0a4c1f82e25..bbe7634b3a43 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -543,7 +543,25 @@ void smp_send_debugger_break(void)
#ifdef CONFIG_KEXEC_CORE
void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
{
+ int cpu;
+
smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_ipi_callback, 1000000);
+ if (kdump_in_progress() && crash_wake_offline) {
+ for_each_present_cpu(cpu) {
+ if (cpu_online(cpu))
+ continue;
+ /*
+ * crash_ipi_callback will wait for
+ * all cpus, including offline CPUs.
+ * We don't care about nmi_ipi_function.
+ * Offline cpus will jump straight into
+ * crash_ipi_callback, we can skip the
+ * entire NMI dance and waiting for
+ * cpus to clear pending mask, etc.
+ */
+ do_smp_send_nmi_ipi(cpu);
+ }
+ }
}
#endif
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index b8d4a1dac39f..5a8bfee6e187 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -485,6 +485,7 @@ SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
SYSFS_SPRSETUP(purr, SPRN_PURR);
SYSFS_SPRSETUP(spurr, SPRN_SPURR);
SYSFS_SPRSETUP(pir, SPRN_PIR);
+SYSFS_SPRSETUP(tscr, SPRN_TSCR);
/*
Lets only enable read for phyp resources and
@@ -495,6 +496,7 @@ static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
static DEVICE_ATTR(pir, 0400, show_pir, NULL);
+static DEVICE_ATTR(tscr, 0600, show_tscr, store_tscr);
/*
* This is the system wide DSCR register default value. Any
@@ -785,6 +787,9 @@ static int register_cpu_online(unsigned int cpu)
if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
device_create_file(s, &dev_attr_pir);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_206))
+ device_create_file(s, &dev_attr_tscr);
#endif /* CONFIG_PPC64 */
#ifdef CONFIG_PPC_FSL_BOOK3E
@@ -867,6 +872,9 @@ static int unregister_cpu_online(unsigned int cpu)
if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
device_remove_file(s, &dev_attr_pir);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_206))
+ device_remove_file(s, &dev_attr_tscr);
#endif /* CONFIG_PPC64 */
#ifdef CONFIG_PPC_FSL_BOOK3E
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index fe6f3a285455..a32823dcd9a4 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -244,7 +244,7 @@ static u64 scan_dispatch_log(u64 stop_tb)
void accumulate_stolen_time(void)
{
u64 sst, ust;
- u8 save_soft_enabled = local_paca->soft_enabled;
+ unsigned long save_irq_soft_mask = irq_soft_mask_return();
struct cpu_accounting_data *acct = &local_paca->accounting;
/* We are called early in the exception entry, before
@@ -253,7 +253,7 @@ void accumulate_stolen_time(void)
* needs to reflect that so various debug stuff doesn't
* complain
*/
- local_paca->soft_enabled = 0;
+ irq_soft_mask_set(IRQS_DISABLED);
sst = scan_dispatch_log(acct->starttime_user);
ust = scan_dispatch_log(acct->starttime);
@@ -261,7 +261,7 @@ void accumulate_stolen_time(void)
acct->utime -= ust;
acct->steal_time += ust + sst;
- local_paca->soft_enabled = save_soft_enabled;
+ irq_soft_mask_set(save_irq_soft_mask);
}
static inline u64 calculate_stolen_time(u64 stop_tb)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index c93f1e6a9fff..1e48d157196a 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -20,6 +20,7 @@
#include <linux/sched/debug.h>
#include <linux/kernel.h>
#include <linux/mm.h>
+#include <linux/pkeys.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/ptrace.h>
@@ -38,6 +39,8 @@
#include <linux/ratelimit.h>
#include <linux/context_tracking.h>
#include <linux/smp.h>
+#include <linux/console.h>
+#include <linux/kmsg_dump.h>
#include <asm/emulated_ops.h>
#include <asm/pgtable.h>
@@ -142,6 +145,28 @@ static int die_owner = -1;
static unsigned int die_nest_count;
static int die_counter;
+extern void panic_flush_kmsg_start(void)
+{
+ /*
+ * These are mostly taken from kernel/panic.c, but tries to do
+ * relatively minimal work. Don't use delay functions (TB may
+ * be broken), don't crash dump (need to set a firmware log),
+ * don't run notifiers. We do want to get some information to
+ * Linux console.
+ */
+ console_verbose();
+ bust_spinlocks(1);
+}
+
+extern void panic_flush_kmsg_end(void)
+{
+ printk_safe_flush_on_panic();
+ kmsg_dump(KMSG_DUMP_PANIC);
+ bust_spinlocks(0);
+ debug_locks_off();
+ console_flush_on_panic();
+}
+
static unsigned long oops_begin(struct pt_regs *regs)
{
int cpu;
@@ -266,7 +291,9 @@ void user_single_step_siginfo(struct task_struct *tsk,
info->si_addr = (void __user *)regs->nip;
}
-void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
+
+void _exception_pkey(int signr, struct pt_regs *regs, int code,
+ unsigned long addr, int key)
{
siginfo_t info;
const char fmt32[] = KERN_INFO "%s[%d]: unhandled signal %d " \
@@ -289,13 +316,27 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
local_irq_enable();
current->thread.trap_nr = code;
+
+ /*
+ * Save all the pkey registers AMR/IAMR/UAMOR. Eg: Core dumps need
+ * to capture the content, if the task gets killed.
+ */
+ thread_pkey_regs_save(&current->thread);
+
memset(&info, 0, sizeof(info));
info.si_signo = signr;
info.si_code = code;
info.si_addr = (void __user *) addr;
+ info.si_pkey = key;
+
force_sig_info(signr, &info, current);
}
+void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
+{
+ _exception_pkey(signr, regs, code, addr, 0);
+}
+
void system_reset_exception(struct pt_regs *regs)
{
/*
@@ -337,7 +378,7 @@ void system_reset_exception(struct pt_regs *regs)
* No debugger or crash dump registered, print logs then
* panic.
*/
- __die("System Reset", regs, SIGABRT);
+ die("System Reset", regs, SIGABRT);
mdelay(2*MSEC_PER_SEC); /* Wait a little while for others to print */
add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
@@ -1564,7 +1605,7 @@ void facility_unavailable_exception(struct pt_regs *regs)
u8 status;
bool hv;
- hv = (regs->trap == 0xf80);
+ hv = (TRAP(regs) == 0xf80);
if (hv)
value = mfspr(SPRN_HFSCR);
else
@@ -2113,13 +2154,13 @@ static int __init ppc_warn_emulated_init(void)
if (!dir)
return -ENOMEM;
- d = debugfs_create_u32("do_warn", S_IRUGO | S_IWUSR, dir,
+ d = debugfs_create_u32("do_warn", 0644, dir,
&ppc_warn_emulated);
if (!d)
goto fail;
for (i = 0; i < sizeof(ppc_emulated)/sizeof(*entries); i++) {
- d = debugfs_create_u32(entries[i].name, S_IRUGO | S_IWUSR, dir,
+ d = debugfs_create_u32(entries[i].name, 0644, dir,
(u32 *)&entries[i].val.counter);
if (!d)
goto fail;
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
index 382021324883..c002adcc694c 100644
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -64,6 +64,12 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
cmpwi cr0,r3,CLOCK_REALTIME
cmpwi cr1,r3,CLOCK_MONOTONIC
cror cr0*4+eq,cr0*4+eq,cr1*4+eq
+
+ cmpwi cr5,r3,CLOCK_REALTIME_COARSE
+ cmpwi cr6,r3,CLOCK_MONOTONIC_COARSE
+ cror cr5*4+eq,cr5*4+eq,cr6*4+eq
+
+ cror cr0*4+eq,cr0*4+eq,cr5*4+eq
bne cr0,99f
mflr r12 /* r12 saves lr */
@@ -72,6 +78,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
bl V_LOCAL_FUNC(__get_datapage) /* get data page */
lis r7,NSEC_PER_SEC@h /* want nanoseconds */
ori r7,r7,NSEC_PER_SEC@l
+ beq cr5,70f
50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */
bne cr1,80f /* if not monotonic, all done */
@@ -97,19 +104,57 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
ld r0,CFG_TB_UPDATE_COUNT(r3)
cmpld cr0,r0,r8 /* check if updated */
bne- 50b
+ b 78f
- /* Add wall->monotonic offset and check for overflow or underflow.
+ /*
+ * For coarse clocks we get data directly from the vdso data page, so
+ * we don't need to call __do_get_tspec, but we still need to do the
+ * counter trick.
*/
- add r4,r4,r6
- add r5,r5,r9
- cmpd cr0,r5,r7
- cmpdi cr1,r5,0
- blt 1f
- subf r5,r7,r5
- addi r4,r4,1
-1: bge cr1,80f
- addi r4,r4,-1
- add r5,r5,r7
+70: ld r8,CFG_TB_UPDATE_COUNT(r3)
+ andi. r0,r8,1 /* pending update ? loop */
+ bne- 70b
+ add r3,r3,r0 /* r0 is already 0 */
+
+ /*
+ * CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE
+ * too
+ */
+ ld r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
+ ld r5,STAMP_XTIME+TSPC64_TV_NSEC(r3)
+ bne cr6,75f
+
+ /* CLOCK_MONOTONIC_COARSE */
+ lwa r6,WTOM_CLOCK_SEC(r3)
+ lwa r9,WTOM_CLOCK_NSEC(r3)
+
+ /* check if counter has updated */
+ or r0,r6,r9
+75: or r0,r0,r4
+ or r0,r0,r5
+ xor r0,r0,r0
+ add r3,r3,r0
+ ld r0,CFG_TB_UPDATE_COUNT(r3)
+ cmpld cr0,r0,r8 /* check if updated */
+ bne- 70b
+
+ /* Counter has not updated, so continue calculating proper values for
+ * sec and nsec if monotonic coarse, or just return with the proper
+ * values for realtime.
+ */
+ bne cr6,80f
+
+ /* Add wall->monotonic offset and check for overflow or underflow */
+78: add r4,r4,r6
+ add r5,r5,r9
+ cmpd cr0,r5,r7
+ cmpdi cr1,r5,0
+ blt 79f
+ subf r5,r7,r5
+ addi r4,r4,1
+79: bge cr1,80f
+ addi r4,r4,-1
+ add r5,r5,r7
80: std r4,TSPC64_TV_SEC(r11)
std r5,TSPC64_TV_NSEC(r11)
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 74901a87bf7a..c8af90ff49f0 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -273,6 +273,7 @@ SECTIONS
#ifdef CONFIG_PPC32
.data : AT(ADDR(.data) - LOAD_OFFSET) {
DATA_DATA
+ *(.data.rel*)
*(.sdata)
*(.sdata2)
*(.got.plt) *(.got)
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 87da80ccced1..6256dc3b0087 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -6,6 +6,9 @@
*
* This uses code from arch/sparc/kernel/nmi.c and kernel/watchdog.c
*/
+
+#define pr_fmt(fmt) "watchdog: " fmt
+
#include <linux/kernel.h>
#include <linux/param.h>
#include <linux/init.h>
@@ -26,15 +29,45 @@
#include <asm/paca.h>
/*
- * The watchdog has a simple timer that runs on each CPU, once per timer
- * period. This is the heartbeat.
+ * The powerpc watchdog ensures that each CPU is able to service timers.
+ * The watchdog sets up a simple timer on each CPU to run once per timer
+ * period, and updates a per-cpu timestamp and a "pending" cpumask. This is
+ * the heartbeat.
+ *
+ * Then there are two systems to check that the heartbeat is still running.
+ * The local soft-NMI, and the SMP checker.
+ *
+ * The soft-NMI checker can detect lockups on the local CPU. When interrupts
+ * are disabled with local_irq_disable(), platforms that use soft-masking
+ * can leave hardware interrupts enabled and handle them with a masked
+ * interrupt handler. The masked handler can send the timer interrupt to the
+ * watchdog's soft_nmi_interrupt(), which appears to Linux as an NMI
+ * interrupt, and can be used to detect CPUs stuck with IRQs disabled.
+ *
+ * The soft-NMI checker will compare the heartbeat timestamp for this CPU
+ * with the current time, and take action if the difference exceeds the
+ * watchdog threshold.
+ *
+ * The limitation of the soft-NMI watchdog is that it does not work when
+ * interrupts are hard disabled or otherwise not being serviced. This is
+ * solved by also having a SMP watchdog where all CPUs check all other
+ * CPUs heartbeat.
*
- * Then there are checks to see if the heartbeat has not triggered on a CPU
- * for the panic timeout period. Currently the watchdog only supports an
- * SMP check, so the heartbeat only turns on when we have 2 or more CPUs.
+ * The SMP checker can detect lockups on other CPUs. A gobal "pending"
+ * cpumask is kept, containing all CPUs which enable the watchdog. Each
+ * CPU clears their pending bit in their heartbeat timer. When the bitmask
+ * becomes empty, the last CPU to clear its pending bit updates a global
+ * timestamp and refills the pending bitmask.
*
- * This is not an NMI watchdog, but Linux uses that name for a generic
- * watchdog in some cases, so NMI gets used in some places.
+ * In the heartbeat timer, if any CPU notices that the global timestamp has
+ * not been updated for a period exceeding the watchdog threshold, then it
+ * means the CPU(s) with their bit still set in the pending mask have had
+ * their heartbeat stop, and action is taken.
+ *
+ * Some platforms implement true NMI IPIs, which can by used by the SMP
+ * watchdog to detect an unresponsive CPU and pull it out of its stuck
+ * state with the NMI IPI, to get crash/debug data from it. This way the
+ * SMP watchdog can detect hardware interrupts off lockups.
*/
static cpumask_t wd_cpus_enabled __read_mostly;
@@ -47,19 +80,7 @@ static u64 wd_timer_period_ms __read_mostly; /* interval between heartbeat */
static DEFINE_PER_CPU(struct timer_list, wd_timer);
static DEFINE_PER_CPU(u64, wd_timer_tb);
-/*
- * These are for the SMP checker. CPUs clear their pending bit in their
- * heartbeat. If the bitmask becomes empty, the time is noted and the
- * bitmask is refilled.
- *
- * All CPUs clear their bit in the pending mask every timer period.
- * Once all have cleared, the time is noted and the bits are reset.
- * If the time since all clear was greater than the panic timeout,
- * we can panic with the list of stuck CPUs.
- *
- * This will work best with NMI IPIs for crash code so the stuck CPUs
- * can be pulled out to get their backtraces.
- */
+/* SMP checker bits */
static unsigned long __wd_smp_lock;
static cpumask_t wd_smp_cpus_pending;
static cpumask_t wd_smp_cpus_stuck;
@@ -90,7 +111,7 @@ static inline void wd_smp_unlock(unsigned long *flags)
static void wd_lockup_ipi(struct pt_regs *regs)
{
- pr_emerg("Watchdog CPU:%d Hard LOCKUP\n", raw_smp_processor_id());
+ pr_emerg("CPU %d Hard LOCKUP\n", raw_smp_processor_id());
print_modules();
print_irqtrace_events(current);
if (regs)
@@ -131,8 +152,8 @@ static void watchdog_smp_panic(int cpu, u64 tb)
if (cpumask_weight(&wd_smp_cpus_pending) == 0)
goto out;
- pr_emerg("Watchdog CPU:%d detected Hard LOCKUP other CPUS:%*pbl\n",
- cpu, cpumask_pr_args(&wd_smp_cpus_pending));
+ pr_emerg("CPU %d detected hard LOCKUP on other CPUs %*pbl\n",
+ cpu, cpumask_pr_args(&wd_smp_cpus_pending));
if (!sysctl_hardlockup_all_cpu_backtrace) {
/*
@@ -175,7 +196,7 @@ static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) {
unsigned long flags;
- pr_emerg("Watchdog CPU:%d became unstuck\n", cpu);
+ pr_emerg("CPU %d became unstuck\n", cpu);
wd_smp_lock(&flags);
cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck);
wd_smp_unlock(&flags);
@@ -233,13 +254,10 @@ void soft_nmi_interrupt(struct pt_regs *regs)
}
set_cpu_stuck(cpu, tb);
- pr_emerg("Watchdog CPU:%d Hard LOCKUP\n", cpu);
+ pr_emerg("CPU %d self-detected hard LOCKUP @ %pS\n", cpu, (void *)regs->nip);
print_modules();
print_irqtrace_events(current);
- if (regs)
- show_regs(regs);
- else
- dump_stack();
+ show_regs(regs);
wd_smp_unlock(&flags);
@@ -388,30 +406,8 @@ int __init watchdog_nmi_probe(void)
"powerpc/watchdog:online",
start_wd_on_cpu, stop_wd_on_cpu);
if (err < 0) {
- pr_warn("Watchdog could not be initialized");
+ pr_warn("could not be initialized");
return err;
}
return 0;
}
-
-static void handle_backtrace_ipi(struct pt_regs *regs)
-{
- nmi_cpu_backtrace(regs);
-}
-
-static void raise_backtrace_ipi(cpumask_t *mask)
-{
- unsigned int cpu;
-
- for_each_cpu(cpu, mask) {
- if (cpu == smp_processor_id())
- handle_backtrace_ipi(NULL);
- else
- smp_send_nmi_ipi(cpu, handle_backtrace_ipi, 1000000);
- }
-}
-
-void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
-{
- nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi);
-}
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 2d46037ce936..e4f70c33fbc7 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -93,10 +93,10 @@
static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
static int dynamic_mt_modes = 6;
-module_param(dynamic_mt_modes, int, S_IRUGO | S_IWUSR);
+module_param(dynamic_mt_modes, int, 0644);
MODULE_PARM_DESC(dynamic_mt_modes, "Set of allowed dynamic micro-threading modes: 0 (= none), 2, 4, or 6 (= 2 or 4)");
static int target_smt_mode;
-module_param(target_smt_mode, int, S_IRUGO | S_IWUSR);
+module_param(target_smt_mode, int, 0644);
MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
static bool indep_threads_mode = true;
@@ -109,12 +109,10 @@ static struct kernel_param_ops module_param_ops = {
.get = param_get_int,
};
-module_param_cb(kvm_irq_bypass, &module_param_ops, &kvm_irq_bypass,
- S_IRUGO | S_IWUSR);
+module_param_cb(kvm_irq_bypass, &module_param_ops, &kvm_irq_bypass, 0644);
MODULE_PARM_DESC(kvm_irq_bypass, "Bypass passthrough interrupt optimization");
-module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
- S_IRUGO | S_IWUSR);
+module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, 0644);
MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
#endif
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index c356f9a40b24..b11043b23c18 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -87,8 +87,7 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI);
}
if (dsisr & DSISR_MC_TLB_MULTI) {
- if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
- cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_LPID);
+ tlbiel_all_lpid(vcpu->kvm->arch.radix);
dsisr &= ~DSISR_MC_TLB_MULTI;
}
/* Any other errors we don't understand? */
@@ -105,8 +104,7 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
reload_slb(vcpu);
break;
case SRR1_MC_IFETCH_TLBMULTI:
- if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
- cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_LPID);
+ tlbiel_all_lpid(vcpu->kvm->arch.radix);
break;
default:
handled = 0;
@@ -268,17 +266,19 @@ static void kvmppc_tb_resync_done(void)
* secondary threads to proceed.
* - All secondary threads will eventually call opal hmi handler on
* their exit path.
+ *
+ * Returns 1 if the timebase offset should be applied, 0 if not.
*/
long kvmppc_realmode_hmi_handler(void)
{
- int ptid = local_paca->kvm_hstate.ptid;
bool resync_req;
- /* This is only called on primary thread. */
- BUG_ON(ptid != 0);
__this_cpu_inc(irq_stat.hmi_exceptions);
+ if (hmi_handle_debugtrig(NULL) >= 0)
+ return 1;
+
/*
* By now primary thread has already completed guest->host
* partition switch but haven't signaled secondaries yet.
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 26c11f678fbf..8888e625a999 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -42,7 +42,7 @@ static void *real_vmalloc_addr(void *x)
}
/* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */
-static int global_invalidates(struct kvm *kvm, unsigned long flags)
+static int global_invalidates(struct kvm *kvm)
{
int global;
int cpu;
@@ -522,7 +522,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
if (v & HPTE_V_VALID) {
hpte[0] &= ~cpu_to_be64(HPTE_V_VALID);
rb = compute_tlbie_rb(v, pte_r, pte_index);
- do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
+ do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true);
/*
* The reference (R) and change (C) bits in a HPT
* entry can be set by hardware at any time up until
@@ -572,7 +572,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
if (kvm_is_radix(kvm))
return H_FUNCTION;
- global = global_invalidates(kvm, 0);
+ global = global_invalidates(kvm);
for (i = 0; i < 4 && ret == H_SUCCESS; ) {
n = 0;
for (; i < 4; ++i) {
@@ -732,8 +732,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
rb = compute_tlbie_rb(v, r, pte_index);
hpte[0] = cpu_to_be64((pte_v & ~HPTE_V_VALID) |
HPTE_V_ABSENT);
- do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags),
- true);
+ do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true);
/* Don't lose R/C bit updates done by hardware */
r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C);
hpte[1] = cpu_to_be64(r);
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 9c61f736c75b..7886b313d135 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1908,16 +1908,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
bne 27f
bl kvmppc_realmode_hmi_handler
nop
+ cmpdi r3, 0
li r12, BOOK3S_INTERRUPT_HMI
/*
- * At this point kvmppc_realmode_hmi_handler would have resync-ed
- * the TB. Hence it is not required to subtract guest timebase
- * offset from timebase. So, skip it.
+ * At this point kvmppc_realmode_hmi_handler may have resync-ed
+ * the TB, and if it has, we must not subtract the guest timebase
+ * offset from the timebase. So, skip it.
*
* Also, do not call kvmppc_subcore_exit_guest() because it has
* been invoked as part of kvmppc_realmode_hmi_handler().
*/
- b 30f
+ beq 30f
27:
/* Subtract timebase offset from timebase */
@@ -3248,7 +3249,7 @@ kvmppc_bad_host_intr:
mfctr r4
#endif
mfxer r5
- lbz r6, PACASOFTIRQEN(r13)
+ lbz r6, PACAIRQSOFTMASK(r13)
std r3, _LINK(r1)
std r4, _CTR(r1)
std r5, _XER(r1)
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index d329b2add7e2..b8356cdc0c04 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -1039,7 +1039,7 @@ static void xics_debugfs_init(struct kvmppc_xics *xics)
return;
}
- xics->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root,
+ xics->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root,
xics, &xics_debug_fops);
pr_debug("%s: created %s\n", __func__, name);
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index d469224c4ada..e0d881ab304e 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -23,19 +23,26 @@
#include <asm/code-patching.h>
#include <asm/setup.h>
-static int __patch_instruction(unsigned int *addr, unsigned int instr)
+static int __patch_instruction(unsigned int *exec_addr, unsigned int instr,
+ unsigned int *patch_addr)
{
int err;
- __put_user_size(instr, addr, 4, err);
+ __put_user_size(instr, patch_addr, 4, err);
if (err)
return err;
- asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" :: "r" (addr));
+ asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr),
+ "r" (exec_addr));
return 0;
}
+int raw_patch_instruction(unsigned int *addr, unsigned int instr)
+{
+ return __patch_instruction(addr, instr, addr);
+}
+
#ifdef CONFIG_STRICT_KERNEL_RWX
static DEFINE_PER_CPU(struct vm_struct *, text_poke_area);
@@ -138,7 +145,7 @@ static inline int unmap_patch_area(unsigned long addr)
int patch_instruction(unsigned int *addr, unsigned int instr)
{
int err;
- unsigned int *dest = NULL;
+ unsigned int *patch_addr = NULL;
unsigned long flags;
unsigned long text_poke_addr;
unsigned long kaddr = (unsigned long)addr;
@@ -148,8 +155,8 @@ int patch_instruction(unsigned int *addr, unsigned int instr)
* when text_poke_area is not ready, but we still need
* to allow patching. We just do the plain old patching
*/
- if (!this_cpu_read(*PTRRELOC(&text_poke_area)))
- return __patch_instruction(addr, instr);
+ if (!this_cpu_read(text_poke_area))
+ return raw_patch_instruction(addr, instr);
local_irq_save(flags);
@@ -159,17 +166,10 @@ int patch_instruction(unsigned int *addr, unsigned int instr)
goto out;
}
- dest = (unsigned int *)(text_poke_addr) +
+ patch_addr = (unsigned int *)(text_poke_addr) +
((kaddr & ~PAGE_MASK) / sizeof(unsigned int));
- /*
- * We use __put_user_size so that we can handle faults while
- * writing to dest and return err to handle faults gracefully
- */
- __put_user_size(instr, dest, 4, err);
- if (!err)
- asm ("dcbst 0, %0; sync; icbi 0,%0; icbi 0,%1; sync; isync"
- ::"r" (dest), "r"(addr));
+ __patch_instruction(addr, instr, patch_addr);
err = unmap_patch_area(text_poke_addr);
if (err)
@@ -184,7 +184,7 @@ out:
int patch_instruction(unsigned int *addr, unsigned int instr)
{
- return __patch_instruction(addr, instr);
+ return raw_patch_instruction(addr, instr);
}
#endif /* CONFIG_STRICT_KERNEL_RWX */
@@ -302,6 +302,11 @@ int instr_is_relative_branch(unsigned int instr)
return instr_is_branch_iform(instr) || instr_is_branch_bform(instr);
}
+int instr_is_relative_link_branch(unsigned int instr)
+{
+ return instr_is_relative_branch(instr) && (instr & BRANCH_SET_LINK);
+}
+
static unsigned long branch_iform_target(const unsigned int *instr)
{
signed long imm;
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index a95ea007d654..73697c4e3468 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -62,7 +62,7 @@ static int patch_alt_instruction(unsigned int *src, unsigned int *dest,
}
}
- patch_instruction(dest, instr);
+ raw_patch_instruction(dest, instr);
return 0;
}
@@ -91,7 +91,7 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur)
}
for (; dest < end; dest++)
- patch_instruction(dest, PPC_INST_NOP);
+ raw_patch_instruction(dest, PPC_INST_NOP);
return 0;
}
@@ -170,7 +170,7 @@ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
for (; start < end; start++) {
dest = (void *)start + *start;
- patch_instruction(dest, PPC_INST_LWSYNC);
+ raw_patch_instruction(dest, PPC_INST_LWSYNC);
}
}
@@ -188,7 +188,7 @@ static void do_final_fixups(void)
length = (__end_interrupts - _stext) / sizeof(int);
while (length--) {
- patch_instruction(dest, *src);
+ raw_patch_instruction(dest, *src);
src++;
dest++;
}
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c
index f29212e40f40..849f50cd62f2 100644
--- a/arch/powerpc/mm/8xx_mmu.c
+++ b/arch/powerpc/mm/8xx_mmu.c
@@ -67,7 +67,7 @@ void __init MMU_init_hw(void)
/* PIN up to the 3 first 8Mb after IMMR in DTLB table */
#ifdef CONFIG_PIN_TLB_DATA
unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000;
- unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY;
+ unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY;
#ifdef CONFIG_PIN_TLB_IMMR
int i = 29;
#else
@@ -79,7 +79,7 @@ void __init MMU_init_hw(void)
for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) {
mtspr(SPRN_MD_CTR, ctr | (i << 8));
mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID);
- mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID);
+ mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID | M_APG2);
mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT);
addr += LARGE_PAGE_SIZE_8M;
mem -= LARGE_PAGE_SIZE_8M;
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 76a6b057d454..f06f3577d8d1 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -9,7 +9,7 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
obj-y := fault.o mem.o pgtable.o mmap.o \
init_$(BITS).o pgtable_$(BITS).o \
- init-common.o mmu_context.o
+ init-common.o mmu_context.o drmem.o
obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
tlb_nohash_low.o
obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o
@@ -44,3 +44,4 @@ obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o
obj-$(CONFIG_SPAPR_TCE_IOMMU) += mmu_context_iommu.o
obj-$(CONFIG_PPC_PTDUMP) += dump_linuxpagetables.o
obj-$(CONFIG_PPC_HTDUMP) += dump_hashpagetable.o
+obj-$(CONFIG_PPC_MEM_KEYS) += pkeys.o
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
new file mode 100644
index 000000000000..1604110c4238
--- /dev/null
+++ b/arch/powerpc/mm/drmem.c
@@ -0,0 +1,439 @@
+/*
+ * Dynamic reconfiguration memory support
+ *
+ * Copyright 2017 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "drmem: " fmt
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/memblock.h>
+#include <asm/prom.h>
+#include <asm/drmem.h>
+
+static struct drmem_lmb_info __drmem_info;
+struct drmem_lmb_info *drmem_info = &__drmem_info;
+
+u64 drmem_lmb_memory_max(void)
+{
+ struct drmem_lmb *last_lmb;
+
+ last_lmb = &drmem_info->lmbs[drmem_info->n_lmbs - 1];
+ return last_lmb->base_addr + drmem_lmb_size();
+}
+
+static u32 drmem_lmb_flags(struct drmem_lmb *lmb)
+{
+ /*
+ * Return the value of the lmb flags field minus the reserved
+ * bit used internally for hotplug processing.
+ */
+ return lmb->flags & ~DRMEM_LMB_RESERVED;
+}
+
+static struct property *clone_property(struct property *prop, u32 prop_sz)
+{
+ struct property *new_prop;
+
+ new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL);
+ if (!new_prop)
+ return NULL;
+
+ new_prop->name = kstrdup(prop->name, GFP_KERNEL);
+ new_prop->value = kzalloc(prop_sz, GFP_KERNEL);
+ if (!new_prop->name || !new_prop->value) {
+ kfree(new_prop->name);
+ kfree(new_prop->value);
+ kfree(new_prop);
+ return NULL;
+ }
+
+ new_prop->length = prop_sz;
+#if defined(CONFIG_OF_DYNAMIC)
+ of_property_set_flag(new_prop, OF_DYNAMIC);
+#endif
+ return new_prop;
+}
+
+static int drmem_update_dt_v1(struct device_node *memory,
+ struct property *prop)
+{
+ struct property *new_prop;
+ struct of_drconf_cell_v1 *dr_cell;
+ struct drmem_lmb *lmb;
+ u32 *p;
+
+ new_prop = clone_property(prop, prop->length);
+ if (!new_prop)
+ return -1;
+
+ p = new_prop->value;
+ *p++ = cpu_to_be32(drmem_info->n_lmbs);
+
+ dr_cell = (struct of_drconf_cell_v1 *)p;
+
+ for_each_drmem_lmb(lmb) {
+ dr_cell->base_addr = cpu_to_be64(lmb->base_addr);
+ dr_cell->drc_index = cpu_to_be32(lmb->drc_index);
+ dr_cell->aa_index = cpu_to_be32(lmb->aa_index);
+ dr_cell->flags = cpu_to_be32(drmem_lmb_flags(lmb));
+
+ dr_cell++;
+ }
+
+ of_update_property(memory, new_prop);
+ return 0;
+}
+
+static void init_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
+ struct drmem_lmb *lmb)
+{
+ dr_cell->base_addr = cpu_to_be64(lmb->base_addr);
+ dr_cell->drc_index = cpu_to_be32(lmb->drc_index);
+ dr_cell->aa_index = cpu_to_be32(lmb->aa_index);
+ dr_cell->flags = cpu_to_be32(lmb->flags);
+}
+
+static int drmem_update_dt_v2(struct device_node *memory,
+ struct property *prop)
+{
+ struct property *new_prop;
+ struct of_drconf_cell_v2 *dr_cell;
+ struct drmem_lmb *lmb, *prev_lmb;
+ u32 lmb_sets, prop_sz, seq_lmbs;
+ u32 *p;
+
+ /* First pass, determine how many LMB sets are needed. */
+ lmb_sets = 0;
+ prev_lmb = NULL;
+ for_each_drmem_lmb(lmb) {
+ if (!prev_lmb) {
+ prev_lmb = lmb;
+ lmb_sets++;
+ continue;
+ }
+
+ if (prev_lmb->aa_index != lmb->aa_index ||
+ prev_lmb->flags != lmb->flags)
+ lmb_sets++;
+
+ prev_lmb = lmb;
+ }
+
+ prop_sz = lmb_sets * sizeof(*dr_cell) + sizeof(__be32);
+ new_prop = clone_property(prop, prop_sz);
+ if (!new_prop)
+ return -1;
+
+ p = new_prop->value;
+ *p++ = cpu_to_be32(lmb_sets);
+
+ dr_cell = (struct of_drconf_cell_v2 *)p;
+
+ /* Second pass, populate the LMB set data */
+ prev_lmb = NULL;
+ seq_lmbs = 0;
+ for_each_drmem_lmb(lmb) {
+ if (prev_lmb == NULL) {
+ /* Start of first LMB set */
+ prev_lmb = lmb;
+ init_drconf_v2_cell(dr_cell, lmb);
+ seq_lmbs++;
+ continue;
+ }
+
+ if (prev_lmb->aa_index != lmb->aa_index ||
+ prev_lmb->flags != lmb->flags) {
+ /* end of one set, start of another */
+ dr_cell->seq_lmbs = cpu_to_be32(seq_lmbs);
+ dr_cell++;
+
+ init_drconf_v2_cell(dr_cell, lmb);
+ seq_lmbs = 1;
+ } else {
+ seq_lmbs++;
+ }
+
+ prev_lmb = lmb;
+ }
+
+ /* close out last LMB set */
+ dr_cell->seq_lmbs = cpu_to_be32(seq_lmbs);
+ of_update_property(memory, new_prop);
+ return 0;
+}
+
+int drmem_update_dt(void)
+{
+ struct device_node *memory;
+ struct property *prop;
+ int rc = -1;
+
+ memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (!memory)
+ return -1;
+
+ prop = of_find_property(memory, "ibm,dynamic-memory", NULL);
+ if (prop) {
+ rc = drmem_update_dt_v1(memory, prop);
+ } else {
+ prop = of_find_property(memory, "ibm,dynamic-memory-v2", NULL);
+ if (prop)
+ rc = drmem_update_dt_v2(memory, prop);
+ }
+
+ of_node_put(memory);
+ return rc;
+}
+
+static void __init read_drconf_v1_cell(struct drmem_lmb *lmb,
+ const __be32 **prop)
+{
+ const __be32 *p = *prop;
+
+ lmb->base_addr = dt_mem_next_cell(dt_root_addr_cells, &p);
+ lmb->drc_index = of_read_number(p++, 1);
+
+ p++; /* skip reserved field */
+
+ lmb->aa_index = of_read_number(p++, 1);
+ lmb->flags = of_read_number(p++, 1);
+
+ *prop = p;
+}
+
+static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm,
+ void (*func)(struct drmem_lmb *, const __be32 **))
+{
+ struct drmem_lmb lmb;
+ u32 i, n_lmbs;
+
+ n_lmbs = of_read_number(prop++, 1);
+
+ for (i = 0; i < n_lmbs; i++) {
+ read_drconf_v1_cell(&lmb, &prop);
+ func(&lmb, &usm);
+ }
+}
+
+static void __init read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
+ const __be32 **prop)
+{
+ const __be32 *p = *prop;
+
+ dr_cell->seq_lmbs = of_read_number(p++, 1);
+ dr_cell->base_addr = dt_mem_next_cell(dt_root_addr_cells, &p);
+ dr_cell->drc_index = of_read_number(p++, 1);
+ dr_cell->aa_index = of_read_number(p++, 1);
+ dr_cell->flags = of_read_number(p++, 1);
+
+ *prop = p;
+}
+
+static void __init __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm,
+ void (*func)(struct drmem_lmb *, const __be32 **))
+{
+ struct of_drconf_cell_v2 dr_cell;
+ struct drmem_lmb lmb;
+ u32 i, j, lmb_sets;
+
+ lmb_sets = of_read_number(prop++, 1);
+
+ for (i = 0; i < lmb_sets; i++) {
+ read_drconf_v2_cell(&dr_cell, &prop);
+
+ for (j = 0; j < dr_cell.seq_lmbs; j++) {
+ lmb.base_addr = dr_cell.base_addr;
+ dr_cell.base_addr += drmem_lmb_size();
+
+ lmb.drc_index = dr_cell.drc_index;
+ dr_cell.drc_index++;
+
+ lmb.aa_index = dr_cell.aa_index;
+ lmb.flags = dr_cell.flags;
+
+ func(&lmb, &usm);
+ }
+ }
+}
+
+#ifdef CONFIG_PPC_PSERIES
+void __init walk_drmem_lmbs_early(unsigned long node,
+ void (*func)(struct drmem_lmb *, const __be32 **))
+{
+ const __be32 *prop, *usm;
+ int len;
+
+ prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len);
+ if (!prop || len < dt_root_size_cells * sizeof(__be32))
+ return;
+
+ drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, &prop);
+
+ usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory", &len);
+
+ prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &len);
+ if (prop) {
+ __walk_drmem_v1_lmbs(prop, usm, func);
+ } else {
+ prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory-v2",
+ &len);
+ if (prop)
+ __walk_drmem_v2_lmbs(prop, usm, func);
+ }
+
+ memblock_dump_all();
+}
+
+#endif
+
+static int __init init_drmem_lmb_size(struct device_node *dn)
+{
+ const __be32 *prop;
+ int len;
+
+ if (drmem_info->lmb_size)
+ return 0;
+
+ prop = of_get_property(dn, "ibm,lmb-size", &len);
+ if (!prop || len < dt_root_size_cells * sizeof(__be32)) {
+ pr_info("Could not determine LMB size\n");
+ return -1;
+ }
+
+ drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, &prop);
+ return 0;
+}
+
+/*
+ * Returns the property linux,drconf-usable-memory if
+ * it exists (the property exists only in kexec/kdump kernels,
+ * added by kexec-tools)
+ */
+static const __be32 *of_get_usable_memory(struct device_node *dn)
+{
+ const __be32 *prop;
+ u32 len;
+
+ prop = of_get_property(dn, "linux,drconf-usable-memory", &len);
+ if (!prop || len < sizeof(unsigned int))
+ return NULL;
+
+ return prop;
+}
+
+void __init walk_drmem_lmbs(struct device_node *dn,
+ void (*func)(struct drmem_lmb *, const __be32 **))
+{
+ const __be32 *prop, *usm;
+
+ if (init_drmem_lmb_size(dn))
+ return;
+
+ usm = of_get_usable_memory(dn);
+
+ prop = of_get_property(dn, "ibm,dynamic-memory", NULL);
+ if (prop) {
+ __walk_drmem_v1_lmbs(prop, usm, func);
+ } else {
+ prop = of_get_property(dn, "ibm,dynamic-memory-v2", NULL);
+ if (prop)
+ __walk_drmem_v2_lmbs(prop, usm, func);
+ }
+}
+
+static void __init init_drmem_v1_lmbs(const __be32 *prop)
+{
+ struct drmem_lmb *lmb;
+
+ drmem_info->n_lmbs = of_read_number(prop++, 1);
+
+ drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb),
+ GFP_KERNEL);
+ if (!drmem_info->lmbs)
+ return;
+
+ for_each_drmem_lmb(lmb)
+ read_drconf_v1_cell(lmb, &prop);
+}
+
+static void __init init_drmem_v2_lmbs(const __be32 *prop)
+{
+ struct drmem_lmb *lmb;
+ struct of_drconf_cell_v2 dr_cell;
+ const __be32 *p;
+ u32 i, j, lmb_sets;
+ int lmb_index;
+
+ lmb_sets = of_read_number(prop++, 1);
+
+ /* first pass, calculate the number of LMBs */
+ p = prop;
+ for (i = 0; i < lmb_sets; i++) {
+ read_drconf_v2_cell(&dr_cell, &p);
+ drmem_info->n_lmbs += dr_cell.seq_lmbs;
+ }
+
+ drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb),
+ GFP_KERNEL);
+ if (!drmem_info->lmbs)
+ return;
+
+ /* second pass, read in the LMB information */
+ lmb_index = 0;
+ p = prop;
+
+ for (i = 0; i < lmb_sets; i++) {
+ read_drconf_v2_cell(&dr_cell, &p);
+
+ for (j = 0; j < dr_cell.seq_lmbs; j++) {
+ lmb = &drmem_info->lmbs[lmb_index++];
+
+ lmb->base_addr = dr_cell.base_addr;
+ dr_cell.base_addr += drmem_info->lmb_size;
+
+ lmb->drc_index = dr_cell.drc_index;
+ dr_cell.drc_index++;
+
+ lmb->aa_index = dr_cell.aa_index;
+ lmb->flags = dr_cell.flags;
+ }
+ }
+}
+
+static int __init drmem_init(void)
+{
+ struct device_node *dn;
+ const __be32 *prop;
+
+ dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (!dn) {
+ pr_info("No dynamic reconfiguration memory found\n");
+ return 0;
+ }
+
+ if (init_drmem_lmb_size(dn)) {
+ of_node_put(dn);
+ return 0;
+ }
+
+ prop = of_get_property(dn, "ibm,dynamic-memory", NULL);
+ if (prop) {
+ init_drmem_v1_lmbs(prop);
+ } else {
+ prop = of_get_property(dn, "ibm,dynamic-memory-v2", NULL);
+ if (prop)
+ init_drmem_v2_lmbs(prop);
+ }
+
+ of_node_put(dn);
+ return 0;
+}
+late_initcall(drmem_init);
diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c
index c2e7dea59490..876e2a3c79f2 100644
--- a/arch/powerpc/mm/dump_linuxpagetables.c
+++ b/arch/powerpc/mm/dump_linuxpagetables.c
@@ -112,26 +112,25 @@ struct flag_info {
static const struct flag_info flag_array[] = {
{
-#ifdef CONFIG_PPC_BOOK3S_64
- .mask = _PAGE_PRIVILEGED,
- .val = 0,
-#else
- .mask = _PAGE_USER,
+ .mask = _PAGE_USER | _PAGE_PRIVILEGED,
.val = _PAGE_USER,
-#endif
.set = "user",
.clear = " ",
}, {
-#if _PAGE_RO == 0
- .mask = _PAGE_RW,
+ .mask = _PAGE_RW | _PAGE_RO | _PAGE_NA,
.val = _PAGE_RW,
-#else
- .mask = _PAGE_RO,
- .val = 0,
-#endif
.set = "rw",
- .clear = "ro",
}, {
+ .mask = _PAGE_RW | _PAGE_RO | _PAGE_NA,
+ .val = _PAGE_RO,
+ .set = "ro",
+ }, {
+#if _PAGE_NA != 0
+ .mask = _PAGE_RW | _PAGE_RO | _PAGE_NA,
+ .val = _PAGE_RO,
+ .set = "na",
+ }, {
+#endif
.mask = _PAGE_EXEC,
.val = _PAGE_EXEC,
.set = " X ",
@@ -213,7 +212,7 @@ static const struct flag_info flag_array[] = {
.val = H_PAGE_4K_PFN,
.set = "4K_pfn",
}, {
-#endif
+#else /* CONFIG_PPC_64K_PAGES */
.mask = H_PAGE_F_GIX,
.val = H_PAGE_F_GIX,
.set = "f_gix",
@@ -224,14 +223,11 @@ static const struct flag_info flag_array[] = {
.val = H_PAGE_F_SECOND,
.set = "f_second",
}, {
+#endif /* CONFIG_PPC_64K_PAGES */
#endif
.mask = _PAGE_SPECIAL,
.val = _PAGE_SPECIAL,
.set = "special",
- }, {
- .mask = _PAGE_SHARED,
- .val = _PAGE_SHARED,
- .set = "shared",
}
};
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 6e1e39035380..866446cf2d9a 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -107,7 +107,8 @@ static bool store_updates_sp(struct pt_regs *regs)
*/
static int
-__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code)
+__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code,
+ int pkey)
{
/*
* If we are in kernel mode, bail out with a SEGV, this will
@@ -117,17 +118,18 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code)
if (!user_mode(regs))
return SIGSEGV;
- _exception(SIGSEGV, regs, si_code, address);
+ _exception_pkey(SIGSEGV, regs, si_code, address, pkey);
return 0;
}
static noinline int bad_area_nosemaphore(struct pt_regs *regs, unsigned long address)
{
- return __bad_area_nosemaphore(regs, address, SEGV_MAPERR);
+ return __bad_area_nosemaphore(regs, address, SEGV_MAPERR, 0);
}
-static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code)
+static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code,
+ int pkey)
{
struct mm_struct *mm = current->mm;
@@ -137,17 +139,23 @@ static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code)
*/
up_read(&mm->mmap_sem);
- return __bad_area_nosemaphore(regs, address, si_code);
+ return __bad_area_nosemaphore(regs, address, si_code, pkey);
}
static noinline int bad_area(struct pt_regs *regs, unsigned long address)
{
- return __bad_area(regs, address, SEGV_MAPERR);
+ return __bad_area(regs, address, SEGV_MAPERR, 0);
+}
+
+static int bad_key_fault_exception(struct pt_regs *regs, unsigned long address,
+ int pkey)
+{
+ return __bad_area_nosemaphore(regs, address, SEGV_PKUERR, pkey);
}
static noinline int bad_access(struct pt_regs *regs, unsigned long address)
{
- return __bad_area(regs, address, SEGV_ACCERR);
+ return __bad_area(regs, address, SEGV_ACCERR, 0);
}
static int do_sigbus(struct pt_regs *regs, unsigned long address,
@@ -432,6 +440,10 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+ if (error_code & DSISR_KEYFAULT)
+ return bad_key_fault_exception(regs, address,
+ get_mm_addr_key(mm, address));
+
/*
* We want to do this outside mmap_sem, because reading code around nip
* can result in fault, which will cause a deadlock when called with
@@ -503,6 +515,31 @@ good_area:
* the fault.
*/
fault = handle_mm_fault(vma, address, flags);
+
+#ifdef CONFIG_PPC_MEM_KEYS
+ /*
+ * if the HPTE is not hashed, hardware will not detect
+ * a key fault. Lets check if we failed because of a
+ * software detected key fault.
+ */
+ if (unlikely(fault & VM_FAULT_SIGSEGV) &&
+ !arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
+ is_exec, 0)) {
+ /*
+ * The PGD-PDT...PMD-PTE tree may not have been fully setup.
+ * Hence we cannot walk the tree to locate the PTE, to locate
+ * the key. Hence let's use vma_pkey() to get the key; instead
+ * of get_mm_addr_key().
+ */
+ int pkey = vma_pkey(vma);
+
+ if (likely(pkey)) {
+ up_read(&mm->mmap_sem);
+ return bad_key_fault_exception(regs, address, pkey);
+ }
+ }
+#endif /* CONFIG_PPC_MEM_KEYS */
+
major |= fault & VM_FAULT_MAJOR;
/*
@@ -576,7 +613,7 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
/* kernel has accessed a bad area */
- switch (regs->trap) {
+ switch (TRAP(regs)) {
case 0x300:
case 0x380:
printk(KERN_ALERT "Unable to handle kernel paging request for "
diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c
index 6fa450c12d6d..5a69b51d08a3 100644
--- a/arch/powerpc/mm/hash64_4k.c
+++ b/arch/powerpc/mm/hash64_4k.c
@@ -20,6 +20,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
pte_t *ptep, unsigned long trap, unsigned long flags,
int ssize, int subpg_prot)
{
+ real_pte_t rpte;
unsigned long hpte_group;
unsigned long rflags, pa;
unsigned long old_pte, new_pte;
@@ -54,6 +55,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
* need to add in 0x1 if it's a read-only user page
*/
rflags = htab_convert_pte_flags(new_pte);
+ rpte = __real_pte(__pte(old_pte), ptep);
if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -64,13 +66,10 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
/*
* There MIGHT be an HPTE for this pte
*/
- hash = hpt_hash(vpn, shift, ssize);
- if (old_pte & H_PAGE_F_SECOND)
- hash = ~hash;
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT;
+ unsigned long gslot = pte_get_hash_gslot(vpn, shift, ssize,
+ rpte, 0);
- if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_4K,
+ if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, MMU_PAGE_4K,
MMU_PAGE_4K, ssize, flags) == -1)
old_pte &= ~_PAGE_HPTEFLAGS;
}
@@ -118,8 +117,7 @@ repeat:
return -1;
}
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
- new_pte |= (slot << H_PAGE_F_GIX_SHIFT) &
- (H_PAGE_F_SECOND | H_PAGE_F_GIX);
+ new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
}
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index 1a68cb19b0e3..2253bbc6a599 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -15,34 +15,22 @@
#include <linux/mm.h>
#include <asm/machdep.h>
#include <asm/mmu.h>
+
/*
- * index from 0 - 15
+ * Return true, if the entry has a slot value which
+ * the software considers as invalid.
*/
-bool __rpte_sub_valid(real_pte_t rpte, unsigned long index)
+static inline bool hpte_soft_invalid(unsigned long hidx)
{
- unsigned long g_idx;
- unsigned long ptev = pte_val(rpte.pte);
-
- g_idx = (ptev & H_PAGE_COMBO_VALID) >> H_PAGE_F_GIX_SHIFT;
- index = index >> 2;
- if (g_idx & (0x1 << index))
- return true;
- else
- return false;
+ return ((hidx & 0xfUL) == 0xfUL);
}
+
/*
* index from 0 - 15
*/
-static unsigned long mark_subptegroup_valid(unsigned long ptev, unsigned long index)
+bool __rpte_sub_valid(real_pte_t rpte, unsigned long index)
{
- unsigned long g_idx;
-
- if (!(ptev & H_PAGE_COMBO))
- return ptev;
- index = index >> 2;
- g_idx = 0x1 << index;
-
- return ptev | (g_idx << H_PAGE_F_GIX_SHIFT);
+ return !(hpte_soft_invalid(__rpte_to_hidx(rpte, index)));
}
int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
@@ -50,12 +38,11 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
int ssize, int subpg_prot)
{
real_pte_t rpte;
- unsigned long *hidxp;
unsigned long hpte_group;
unsigned int subpg_index;
- unsigned long rflags, pa, hidx;
+ unsigned long rflags, pa;
unsigned long old_pte, new_pte, subpg_pte;
- unsigned long vpn, hash, slot;
+ unsigned long vpn, hash, slot, gslot;
unsigned long shift = mmu_psize_defs[MMU_PAGE_4K].shift;
/*
@@ -116,8 +103,8 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
* On hash insert failure we use old pte value and we don't
* want slot information there if we have a insert failure.
*/
- old_pte &= ~(H_PAGE_HASHPTE | H_PAGE_F_GIX | H_PAGE_F_SECOND);
- new_pte &= ~(H_PAGE_HASHPTE | H_PAGE_F_GIX | H_PAGE_F_SECOND);
+ old_pte &= ~H_PAGE_HASHPTE;
+ new_pte &= ~H_PAGE_HASHPTE;
goto htab_insert_hpte;
}
/*
@@ -126,18 +113,14 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
if (__rpte_sub_valid(rpte, subpg_index)) {
int ret;
- hash = hpt_hash(vpn, shift, ssize);
- hidx = __rpte_to_hidx(rpte, subpg_index);
- if (hidx & _PTEIDX_SECONDARY)
- hash = ~hash;
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += hidx & _PTEIDX_GROUP_IX;
-
- ret = mmu_hash_ops.hpte_updatepp(slot, rflags, vpn,
+ gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte,
+ subpg_index);
+ ret = mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn,
MMU_PAGE_4K, MMU_PAGE_4K,
ssize, flags);
+
/*
- *if we failed because typically the HPTE wasn't really here
+ * If we failed because typically the HPTE wasn't really here
* we try an insertion.
*/
if (ret == -1)
@@ -148,6 +131,14 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
}
htab_insert_hpte:
+
+ /*
+ * Initialize all hidx entries to invalid value, the first time
+ * the PTE is about to allocate a 4K HPTE.
+ */
+ if (!(old_pte & H_PAGE_COMBO))
+ rpte.hidx = INVALID_RPTE_HIDX;
+
/*
* handle H_PAGE_4K_PFN case
*/
@@ -172,15 +163,39 @@ repeat:
* Primary is full, try the secondary
*/
if (unlikely(slot == -1)) {
+ bool soft_invalid;
+
hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa,
rflags, HPTE_V_SECONDARY,
MMU_PAGE_4K, MMU_PAGE_4K,
ssize);
- if (slot == -1) {
- if (mftb() & 0x1)
+
+ soft_invalid = hpte_soft_invalid(slot);
+ if (unlikely(soft_invalid)) {
+ /*
+ * We got a valid slot from a hardware point of view.
+ * but we cannot use it, because we use this special
+ * value; as defined by hpte_soft_invalid(), to track
+ * invalid slots. We cannot use it. So invalidate it.
+ */
+ gslot = slot & _PTEIDX_GROUP_IX;
+ mmu_hash_ops.hpte_invalidate(hpte_group + gslot, vpn,
+ MMU_PAGE_4K, MMU_PAGE_4K,
+ ssize, 0);
+ }
+
+ if (unlikely(slot == -1 || soft_invalid)) {
+ /*
+ * For soft invalid slot, let's ensure that we release a
+ * slot from the primary, with the hope that we will
+ * acquire that slot next time we try. This will ensure
+ * that we do not get the same soft-invalid slot.
+ */
+ if (soft_invalid || (mftb() & 0x1))
hpte_group = ((hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
+
mmu_hash_ops.hpte_remove(hpte_group);
/*
* FIXME!! Should be try the group from which we removed ?
@@ -198,21 +213,10 @@ repeat:
MMU_PAGE_4K, MMU_PAGE_4K, old_pte);
return -1;
}
- /*
- * Insert slot number & secondary bit in PTE second half,
- * clear H_PAGE_BUSY and set appropriate HPTE slot bit
- * Since we have H_PAGE_BUSY set on ptep, we can be sure
- * nobody is undating hidx.
- */
- hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
- rpte.hidx &= ~(0xfUL << (subpg_index << 2));
- *hidxp = rpte.hidx | (slot << (subpg_index << 2));
- new_pte = mark_subptegroup_valid(new_pte, subpg_index);
- new_pte |= H_PAGE_HASHPTE;
- /*
- * check __real_pte for details on matching smp_rmb()
- */
- smp_wmb();
+
+ new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot);
+ new_pte |= H_PAGE_HASHPTE;
+
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
}
@@ -221,6 +225,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
unsigned long vsid, pte_t *ptep, unsigned long trap,
unsigned long flags, int ssize)
{
+ real_pte_t rpte;
unsigned long hpte_group;
unsigned long rflags, pa;
unsigned long old_pte, new_pte;
@@ -257,6 +262,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
} while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
rflags = htab_convert_pte_flags(new_pte);
+ rpte = __real_pte(__pte(old_pte), ptep);
if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -264,16 +270,13 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
vpn = hpt_vpn(ea, vsid, ssize);
if (unlikely(old_pte & H_PAGE_HASHPTE)) {
+ unsigned long gslot;
+
/*
* There MIGHT be an HPTE for this pte
*/
- hash = hpt_hash(vpn, shift, ssize);
- if (old_pte & H_PAGE_F_SECOND)
- hash = ~hash;
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT;
-
- if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K,
+ gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte, 0);
+ if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, MMU_PAGE_64K,
MMU_PAGE_64K, ssize,
flags) == -1)
old_pte &= ~_PAGE_HPTEFLAGS;
@@ -322,9 +325,9 @@ repeat:
MMU_PAGE_64K, MMU_PAGE_64K, old_pte);
return -1;
}
+
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
- new_pte |= (slot << H_PAGE_F_GIX_SHIFT) &
- (H_PAGE_F_SECOND | H_PAGE_F_GIX);
+ new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
}
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 640cf566e986..a0675e91ad7d 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -47,6 +47,103 @@
DEFINE_RAW_SPINLOCK(native_tlbie_lock);
+static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is)
+{
+ unsigned long rb;
+
+ rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
+
+ asm volatile("tlbiel %0" : : "r" (rb));
+}
+
+/*
+ * tlbiel instruction for hash, set invalidation
+ * i.e., r=1 and is=01 or is=10 or is=11
+ */
+static inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is,
+ unsigned int pid,
+ unsigned int ric, unsigned int prs)
+{
+ unsigned long rb;
+ unsigned long rs;
+ unsigned int r = 0; /* hash format */
+
+ rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
+ rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
+
+ asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4)
+ : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "r"(r)
+ : "memory");
+}
+
+
+static void tlbiel_all_isa206(unsigned int num_sets, unsigned int is)
+{
+ unsigned int set;
+
+ asm volatile("ptesync": : :"memory");
+
+ for (set = 0; set < num_sets; set++)
+ tlbiel_hash_set_isa206(set, is);
+
+ asm volatile("ptesync": : :"memory");
+}
+
+static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
+{
+ unsigned int set;
+
+ asm volatile("ptesync": : :"memory");
+
+ /*
+ * Flush the first set of the TLB, and any caching of partition table
+ * entries. Then flush the remaining sets of the TLB. Hash mode uses
+ * partition scoped TLB translations.
+ */
+ tlbiel_hash_set_isa300(0, is, 0, 2, 0);
+ for (set = 1; set < num_sets; set++)
+ tlbiel_hash_set_isa300(set, is, 0, 0, 0);
+
+ /*
+ * Now invalidate the process table cache.
+ *
+ * From ISA v3.0B p. 1078:
+ * The following forms are invalid.
+ * * PRS=1, R=0, and RIC!=2 (The only process-scoped
+ * HPT caching is of the Process Table.)
+ */
+ tlbiel_hash_set_isa300(0, is, 0, 2, 1);
+
+ asm volatile("ptesync": : :"memory");
+}
+
+void hash__tlbiel_all(unsigned int action)
+{
+ unsigned int is;
+
+ switch (action) {
+ case TLB_INVAL_SCOPE_GLOBAL:
+ is = 3;
+ break;
+ case TLB_INVAL_SCOPE_LPID:
+ is = 2;
+ break;
+ default:
+ BUG();
+ }
+
+ if (early_cpu_has_feature(CPU_FTR_ARCH_300))
+ tlbiel_all_isa300(POWER9_TLB_SETS_HASH, is);
+ else if (early_cpu_has_feature(CPU_FTR_ARCH_207S))
+ tlbiel_all_isa206(POWER8_TLB_SETS, is);
+ else if (early_cpu_has_feature(CPU_FTR_ARCH_206))
+ tlbiel_all_isa206(POWER7_TLB_SETS, is);
+ else
+ WARN(1, "%s called on pre-POWER7 CPU\n", __func__);
+
+ asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
+}
+
static inline unsigned long ___tlbie(unsigned long vpn, int psize,
int apsize, int ssize)
{
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 655a5a9a183d..7d07c7e17db6 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -36,6 +36,7 @@
#include <linux/memblock.h>
#include <linux/context_tracking.h>
#include <linux/libfdt.h>
+#include <linux/pkeys.h>
#include <asm/debugfs.h>
#include <asm/processor.h>
@@ -232,6 +233,7 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
*/
rflags |= HPTE_R_M;
+ rflags |= pte_to_hpte_pkey_bits(pteflags);
return rflags;
}
@@ -606,7 +608,7 @@ static void init_hpte_page_sizes(void)
continue; /* not a supported page size */
for (ap = bp; ap < MMU_PAGE_COUNT; ++ap) {
penc = mmu_psize_defs[bp].penc[ap];
- if (penc == -1)
+ if (penc == -1 || !mmu_psize_defs[ap].shift)
continue;
shift = mmu_psize_defs[ap].shift - LP_SHIFT;
if (shift <= 0)
@@ -772,7 +774,7 @@ void resize_hpt_for_hotplug(unsigned long new_mem_size)
int rc;
rc = mmu_hash_ops.resize_hpt(target_hpt_shift);
- if (rc)
+ if (rc && (rc != -ENODEV))
printk(KERN_WARNING
"Unable to resize hash page table to target order %d: %d\n",
target_hpt_shift, rc);
@@ -979,8 +981,9 @@ void __init hash__early_init_devtree(void)
void __init hash__early_init_mmu(void)
{
+#ifndef CONFIG_PPC_64K_PAGES
/*
- * We have code in __hash_page_64K() and elsewhere, which assumes it can
+ * We have code in __hash_page_4K() and elsewhere, which assumes it can
* do the following:
* new_pte |= (slot << H_PAGE_F_GIX_SHIFT) & (H_PAGE_F_SECOND | H_PAGE_F_GIX);
*
@@ -991,6 +994,7 @@ void __init hash__early_init_mmu(void)
* with a BUILD_BUG_ON().
*/
BUILD_BUG_ON(H_PAGE_F_SECOND != (1ul << (H_PAGE_F_GIX_SHIFT + 3)));
+#endif /* CONFIG_PPC_64K_PAGES */
htab_init_page_sizes();
@@ -1049,6 +1053,10 @@ void __init hash__early_init_mmu(void)
pr_info("Initializing hash mmu with SLB\n");
/* Initialize SLB management */
slb_initialize();
+
+ if (cpu_has_feature(CPU_FTR_ARCH_206)
+ && cpu_has_feature(CPU_FTR_HVMODE))
+ tlbiel_all();
}
#ifdef CONFIG_SMP
@@ -1068,6 +1076,10 @@ void hash__early_init_mmu_secondary(void)
}
/* Initialize SLB */
slb_initialize();
+
+ if (cpu_has_feature(CPU_FTR_ARCH_206)
+ && cpu_has_feature(CPU_FTR_HVMODE))
+ tlbiel_all();
}
#endif /* CONFIG_SMP */
@@ -1569,6 +1581,30 @@ out_exit:
local_irq_restore(flags);
}
+#ifdef CONFIG_PPC_MEM_KEYS
+/*
+ * Return the protection key associated with the given address and the
+ * mm_struct.
+ */
+u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address)
+{
+ pte_t *ptep;
+ u16 pkey = 0;
+ unsigned long flags;
+
+ if (!mm || !mm->pgd)
+ return 0;
+
+ local_irq_save(flags);
+ ptep = find_linux_pte(mm->pgd, address, NULL, NULL);
+ if (ptep)
+ pkey = pte_to_pkey_bits(pte_val(READ_ONCE(*ptep)));
+ local_irq_restore(flags);
+
+ return pkey;
+}
+#endif /* CONFIG_PPC_MEM_KEYS */
+
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
static inline void tm_flush_hash_page(int local)
{
@@ -1592,29 +1628,42 @@ static inline void tm_flush_hash_page(int local)
}
#endif
+/*
+ * Return the global hash slot, corresponding to the given PTE, which contains
+ * the HPTE.
+ */
+unsigned long pte_get_hash_gslot(unsigned long vpn, unsigned long shift,
+ int ssize, real_pte_t rpte, unsigned int subpg_index)
+{
+ unsigned long hash, gslot, hidx;
+
+ hash = hpt_hash(vpn, shift, ssize);
+ hidx = __rpte_to_hidx(rpte, subpg_index);
+ if (hidx & _PTEIDX_SECONDARY)
+ hash = ~hash;
+ gslot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ gslot += hidx & _PTEIDX_GROUP_IX;
+ return gslot;
+}
+
/* WARNING: This is called from hash_low_64.S, if you change this prototype,
* do not forget to update the assembly call site !
*/
void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
unsigned long flags)
{
- unsigned long hash, index, shift, hidx, slot;
+ unsigned long index, shift, gslot;
int local = flags & HPTE_LOCAL_UPDATE;
DBG_LOW("flush_hash_page(vpn=%016lx)\n", vpn);
pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
- hash = hpt_hash(vpn, shift, ssize);
- hidx = __rpte_to_hidx(pte, index);
- if (hidx & _PTEIDX_SECONDARY)
- hash = ~hash;
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += hidx & _PTEIDX_GROUP_IX;
- DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx);
+ gslot = pte_get_hash_gslot(vpn, shift, ssize, pte, index);
+ DBG_LOW(" sub %ld: gslot=%lx\n", index, gslot);
/*
* We use same base page size and actual psize, because we don't
* use these functions for hugepage
*/
- mmu_hash_ops.hpte_invalidate(slot, vpn, psize, psize,
+ mmu_hash_ops.hpte_invalidate(gslot, vpn, psize, psize,
ssize, local);
} pte_iterate_hashed_end();
@@ -1825,16 +1874,24 @@ void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base,
*/
BUG_ON(first_memblock_base != 0);
- /* On LPAR systems, the first entry is our RMA region,
- * non-LPAR 64-bit hash MMU systems don't have a limitation
- * on real mode access, but using the first entry works well
- * enough. We also clamp it to 1G to avoid some funky things
- * such as RTAS bugs etc...
+ /*
+ * On virtualized systems the first entry is our RMA region aka VRMA,
+ * non-virtualized 64-bit hash MMU systems don't have a limitation
+ * on real mode access.
+ *
+ * For guests on platforms before POWER9, we clamp the it limit to 1G
+ * to avoid some funky things such as RTAS bugs etc...
*/
- ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000);
+ if (!early_cpu_has_feature(CPU_FTR_HVMODE)) {
+ ppc64_rma_size = first_memblock_size;
+ if (!early_cpu_has_feature(CPU_FTR_ARCH_300))
+ ppc64_rma_size = min_t(u64, ppc64_rma_size, 0x40000000);
- /* Finally limit subsequent allocations */
- memblock_set_current_limit(ppc64_rma_size);
+ /* Finally limit subsequent allocations */
+ memblock_set_current_limit(ppc64_rma_size);
+ } else {
+ ppc64_rma_size = ULONG_MAX;
+ }
}
#ifdef CONFIG_DEBUG_FS
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index 0c2a91df3210..12511f5a015f 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -23,6 +23,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
pte_t *ptep, unsigned long trap, unsigned long flags,
int ssize, unsigned int shift, unsigned int mmu_psize)
{
+ real_pte_t rpte;
unsigned long vpn;
unsigned long old_pte, new_pte;
unsigned long rflags, pa, sz;
@@ -62,6 +63,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
} while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
rflags = htab_convert_pte_flags(new_pte);
+ rpte = __real_pte(__pte(old_pte), ptep);
sz = ((1UL) << shift);
if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -72,15 +74,10 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
/* Check if pte already has an hpte (case 2) */
if (unlikely(old_pte & H_PAGE_HASHPTE)) {
/* There MIGHT be an HPTE for this pte */
- unsigned long hash, slot;
+ unsigned long gslot;
- hash = hpt_hash(vpn, shift, ssize);
- if (old_pte & H_PAGE_F_SECOND)
- hash = ~hash;
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT;
-
- if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, mmu_psize,
+ gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte, 0);
+ if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, mmu_psize,
mmu_psize, ssize, flags) == -1)
old_pte &= ~_PAGE_HPTEFLAGS;
}
@@ -107,8 +104,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
return -1;
}
- new_pte |= (slot << H_PAGE_F_GIX_SHIFT) &
- (H_PAGE_F_SECOND | H_PAGE_F_GIX);
+ new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
}
/*
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index a9b9083c5e49..876da2bc1796 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -96,7 +96,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
*hpdp = __hugepd(__pa(new) |
(shift_to_mmu_psize(pshift) << 2));
#elif defined(CONFIG_PPC_8xx)
- *hpdp = __hugepd(__pa(new) |
+ *hpdp = __hugepd(__pa(new) | _PMD_USER |
(pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M :
_PMD_PAGE_512K) | _PMD_PRESENT);
#else
@@ -752,7 +752,7 @@ void flush_dcache_icache_hugepage(struct page *page)
* So long as we atomically load page table pointers we are safe against teardown,
* we can follow the address down to the the page and take a ref on it.
* This function need to be called with interrupts disabled. We use this variant
- * when we have MSR[EE] = 0 but the paca->soft_enabled = 1
+ * when we have MSR[EE] = 0 but the paca->irq_soft_mask = IRQS_ENABLED
*/
pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
bool *is_thp, unsigned *hpage_shift)
@@ -855,9 +855,7 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
pte = READ_ONCE(*ptep);
- if (!pte_present(pte) || !pte_read(pte))
- return 0;
- if (write && !pte_write(pte))
+ if (!pte_access_permitted(pte, write))
return 0;
/* hugepages are never "special" */
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index a07722531b32..f6eb7e8f4c93 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -214,9 +214,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
rc = vmemmap_create_mapping(start, page_size, __pa(p));
if (rc < 0) {
- pr_warning(
- "vmemmap_populate: Unable to create vmemmap mapping: %d\n",
- rc);
+ pr_warn("%s: Unable to create vmemmap mapping: %d\n",
+ __func__, rc);
return -EFAULT;
}
}
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 4362b86ef84c..1281c6eb3a85 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -138,8 +138,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
start = (unsigned long)__va(start);
rc = create_section_mapping(start, start + size);
if (rc) {
- pr_warning(
- "Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n",
+ pr_warn("Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n",
start, start + size, rc);
return -EFAULT;
}
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 59c0766ae4e0..929d9ef7083f 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -16,6 +16,7 @@
#include <linux/string.h>
#include <linux/types.h>
#include <linux/mm.h>
+#include <linux/pkeys.h>
#include <linux/spinlock.h>
#include <linux/idr.h>
#include <linux/export.h>
@@ -118,6 +119,7 @@ static int hash__init_new_context(struct mm_struct *mm)
subpage_prot_init_new_context(mm);
+ pkey_mm_init(mm);
return index;
}
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index adb6364f4091..314d19ab9385 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -40,6 +40,7 @@
#include <asm/hvcall.h>
#include <asm/setup.h>
#include <asm/vdso.h>
+#include <asm/drmem.h>
static int numa_enabled = 1;
@@ -179,21 +180,6 @@ static const __be32 *of_get_associativity(struct device_node *dev)
return of_get_property(dev, "ibm,associativity", NULL);
}
-/*
- * Returns the property linux,drconf-usable-memory if
- * it exists (the property exists only in kexec/kdump kernels,
- * added by kexec-tools)
- */
-static const __be32 *of_get_usable_memory(struct device_node *memory)
-{
- const __be32 *prop;
- u32 len;
- prop = of_get_property(memory, "linux,drconf-usable-memory", &len);
- if (!prop || len < sizeof(unsigned int))
- return NULL;
- return prop;
-}
-
int __node_distance(int a, int b)
{
int i;
@@ -387,69 +373,6 @@ static unsigned long read_n_cells(int n, const __be32 **buf)
return result;
}
-/*
- * Read the next memblock list entry from the ibm,dynamic-memory property
- * and return the information in the provided of_drconf_cell structure.
- */
-static void read_drconf_cell(struct of_drconf_cell *drmem, const __be32 **cellp)
-{
- const __be32 *cp;
-
- drmem->base_addr = read_n_cells(n_mem_addr_cells, cellp);
-
- cp = *cellp;
- drmem->drc_index = of_read_number(cp, 1);
- drmem->reserved = of_read_number(&cp[1], 1);
- drmem->aa_index = of_read_number(&cp[2], 1);
- drmem->flags = of_read_number(&cp[3], 1);
-
- *cellp = cp + 4;
-}
-
-/*
- * Retrieve and validate the ibm,dynamic-memory property of the device tree.
- *
- * The layout of the ibm,dynamic-memory property is a number N of memblock
- * list entries followed by N memblock list entries. Each memblock list entry
- * contains information as laid out in the of_drconf_cell struct above.
- */
-static int of_get_drconf_memory(struct device_node *memory, const __be32 **dm)
-{
- const __be32 *prop;
- u32 len, entries;
-
- prop = of_get_property(memory, "ibm,dynamic-memory", &len);
- if (!prop || len < sizeof(unsigned int))
- return 0;
-
- entries = of_read_number(prop++, 1);
-
- /* Now that we know the number of entries, revalidate the size
- * of the property read in to ensure we have everything
- */
- if (len < (entries * (n_mem_addr_cells + 4) + 1) * sizeof(unsigned int))
- return 0;
-
- *dm = prop;
- return entries;
-}
-
-/*
- * Retrieve and validate the ibm,lmb-size property for drconf memory
- * from the device tree.
- */
-static u64 of_get_lmb_size(struct device_node *memory)
-{
- const __be32 *prop;
- u32 len;
-
- prop = of_get_property(memory, "ibm,lmb-size", &len);
- if (!prop || len < sizeof(unsigned int))
- return 0;
-
- return read_n_cells(n_mem_size_cells, &prop);
-}
-
struct assoc_arrays {
u32 n_arrays;
u32 array_sz;
@@ -466,19 +389,27 @@ struct assoc_arrays {
* indicating the size of each associativity array, followed by a list
* of N associativity arrays.
*/
-static int of_get_assoc_arrays(struct device_node *memory,
- struct assoc_arrays *aa)
+static int of_get_assoc_arrays(struct assoc_arrays *aa)
{
+ struct device_node *memory;
const __be32 *prop;
u32 len;
+ memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (!memory)
+ return -1;
+
prop = of_get_property(memory, "ibm,associativity-lookup-arrays", &len);
- if (!prop || len < 2 * sizeof(unsigned int))
+ if (!prop || len < 2 * sizeof(unsigned int)) {
+ of_node_put(memory);
return -1;
+ }
aa->n_arrays = of_read_number(prop++, 1);
aa->array_sz = of_read_number(prop++, 1);
+ of_node_put(memory);
+
/* Now that we know the number of arrays and size of each array,
* revalidate the size of the property read in.
*/
@@ -493,26 +424,30 @@ static int of_get_assoc_arrays(struct device_node *memory,
* This is like of_node_to_nid_single() for memory represented in the
* ibm,dynamic-reconfiguration-memory node.
*/
-static int of_drconf_to_nid_single(struct of_drconf_cell *drmem,
- struct assoc_arrays *aa)
+static int of_drconf_to_nid_single(struct drmem_lmb *lmb)
{
+ struct assoc_arrays aa = { .arrays = NULL };
int default_nid = 0;
int nid = default_nid;
- int index;
+ int rc, index;
+
+ rc = of_get_assoc_arrays(&aa);
+ if (rc)
+ return default_nid;
- if (min_common_depth > 0 && min_common_depth <= aa->array_sz &&
- !(drmem->flags & DRCONF_MEM_AI_INVALID) &&
- drmem->aa_index < aa->n_arrays) {
- index = drmem->aa_index * aa->array_sz + min_common_depth - 1;
- nid = of_read_number(&aa->arrays[index], 1);
+ if (min_common_depth > 0 && min_common_depth <= aa.array_sz &&
+ !(lmb->flags & DRCONF_MEM_AI_INVALID) &&
+ lmb->aa_index < aa.n_arrays) {
+ index = lmb->aa_index * aa.array_sz + min_common_depth - 1;
+ nid = of_read_number(&aa.arrays[index], 1);
if (nid == 0xffff || nid >= MAX_NUMNODES)
nid = default_nid;
if (nid > 0) {
- index = drmem->aa_index * aa->array_sz;
+ index = lmb->aa_index * aa.array_sz;
initialize_distance_lookup_table(nid,
- &aa->arrays[index]);
+ &aa.arrays[index]);
}
}
@@ -551,7 +486,7 @@ static int numa_setup_cpu(unsigned long lcpu)
nid = of_node_to_nid_single(cpu);
out_present:
- if (nid < 0 || !node_online(nid))
+ if (nid < 0 || !node_possible(nid))
nid = first_online_node;
map_cpu_to_node(lcpu, nid);
@@ -645,67 +580,48 @@ static inline int __init read_usm_ranges(const __be32 **usm)
* Extract NUMA information from the ibm,dynamic-reconfiguration-memory
* node. This assumes n_mem_{addr,size}_cells have been set.
*/
-static void __init parse_drconf_memory(struct device_node *memory)
+static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
+ const __be32 **usm)
{
- const __be32 *uninitialized_var(dm), *usm;
- unsigned int n, rc, ranges, is_kexec_kdump = 0;
- unsigned long lmb_size, base, size, sz;
+ unsigned int ranges, is_kexec_kdump = 0;
+ unsigned long base, size, sz;
int nid;
- struct assoc_arrays aa = { .arrays = NULL };
-
- n = of_get_drconf_memory(memory, &dm);
- if (!n)
- return;
-
- lmb_size = of_get_lmb_size(memory);
- if (!lmb_size)
- return;
- rc = of_get_assoc_arrays(memory, &aa);
- if (rc)
+ /*
+ * Skip this block if the reserved bit is set in flags (0x80)
+ * or if the block is not assigned to this partition (0x8)
+ */
+ if ((lmb->flags & DRCONF_MEM_RESERVED)
+ || !(lmb->flags & DRCONF_MEM_ASSIGNED))
return;
- /* check if this is a kexec/kdump kernel */
- usm = of_get_usable_memory(memory);
- if (usm != NULL)
+ if (*usm)
is_kexec_kdump = 1;
- for (; n != 0; --n) {
- struct of_drconf_cell drmem;
-
- read_drconf_cell(&drmem, &dm);
+ base = lmb->base_addr;
+ size = drmem_lmb_size();
+ ranges = 1;
- /* skip this block if the reserved bit is set in flags (0x80)
- or if the block is not assigned to this partition (0x8) */
- if ((drmem.flags & DRCONF_MEM_RESERVED)
- || !(drmem.flags & DRCONF_MEM_ASSIGNED))
- continue;
-
- base = drmem.base_addr;
- size = lmb_size;
- ranges = 1;
+ if (is_kexec_kdump) {
+ ranges = read_usm_ranges(usm);
+ if (!ranges) /* there are no (base, size) duple */
+ return;
+ }
+ do {
if (is_kexec_kdump) {
- ranges = read_usm_ranges(&usm);
- if (!ranges) /* there are no (base, size) duple */
- continue;
+ base = read_n_cells(n_mem_addr_cells, usm);
+ size = read_n_cells(n_mem_size_cells, usm);
}
- do {
- if (is_kexec_kdump) {
- base = read_n_cells(n_mem_addr_cells, &usm);
- size = read_n_cells(n_mem_size_cells, &usm);
- }
- nid = of_drconf_to_nid_single(&drmem, &aa);
- fake_numa_create_new_node(
- ((base + size) >> PAGE_SHIFT),
- &nid);
- node_set_online(nid);
- sz = numa_enforce_memory_limit(base, size);
- if (sz)
- memblock_set_node(base, sz,
- &memblock.memory, nid);
- } while (--ranges);
- }
+
+ nid = of_drconf_to_nid_single(lmb);
+ fake_numa_create_new_node(((base + size) >> PAGE_SHIFT),
+ &nid);
+ node_set_online(nid);
+ sz = numa_enforce_memory_limit(base, size);
+ if (sz)
+ memblock_set_node(base, sz, &memblock.memory, nid);
+ } while (--ranges);
}
static int __init parse_numa_properties(void)
@@ -800,8 +716,10 @@ new_range:
* ibm,dynamic-reconfiguration-memory node.
*/
memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
- if (memory)
- parse_drconf_memory(memory);
+ if (memory) {
+ walk_drmem_lmbs(memory, numa_setup_drmem_lmb);
+ of_node_put(memory);
+ }
return 0;
}
@@ -892,6 +810,32 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
NODE_DATA(nid)->node_spanned_pages = spanned_pages;
}
+static void __init find_possible_nodes(void)
+{
+ struct device_node *rtas;
+ u32 numnodes, i;
+
+ if (min_common_depth <= 0)
+ return;
+
+ rtas = of_find_node_by_path("/rtas");
+ if (!rtas)
+ return;
+
+ if (of_property_read_u32_index(rtas,
+ "ibm,max-associativity-domains",
+ min_common_depth, &numnodes))
+ goto out;
+
+ for (i = 0; i < numnodes; i++) {
+ if (!node_possible(i))
+ node_set(i, node_possible_map);
+ }
+
+out:
+ of_node_put(rtas);
+}
+
void __init initmem_init(void)
{
int nid, cpu;
@@ -905,12 +849,15 @@ void __init initmem_init(void)
memblock_dump_all();
/*
- * Reduce the possible NUMA nodes to the online NUMA nodes,
- * since we do not support node hotplug. This ensures that we
- * lower the maximum NUMA node ID to what is actually present.
+ * Modify the set of possible NUMA nodes to reflect information
+ * available about the set of online nodes, and the set of nodes
+ * that we expect to make use of for this platform's affinity
+ * calculations.
*/
nodes_and(node_possible_map, node_possible_map, node_online_map);
+ find_possible_nodes();
+
for_each_online_node(nid) {
unsigned long start_pfn, end_pfn;
@@ -979,43 +926,26 @@ early_param("topology_updates", early_topology_updates);
* memory represented in the device tree by the property
* ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory.
*/
-static int hot_add_drconf_scn_to_nid(struct device_node *memory,
- unsigned long scn_addr)
+static int hot_add_drconf_scn_to_nid(unsigned long scn_addr)
{
- const __be32 *dm;
- unsigned int drconf_cell_cnt, rc;
+ struct drmem_lmb *lmb;
unsigned long lmb_size;
- struct assoc_arrays aa;
int nid = -1;
- drconf_cell_cnt = of_get_drconf_memory(memory, &dm);
- if (!drconf_cell_cnt)
- return -1;
-
- lmb_size = of_get_lmb_size(memory);
- if (!lmb_size)
- return -1;
-
- rc = of_get_assoc_arrays(memory, &aa);
- if (rc)
- return -1;
-
- for (; drconf_cell_cnt != 0; --drconf_cell_cnt) {
- struct of_drconf_cell drmem;
-
- read_drconf_cell(&drmem, &dm);
+ lmb_size = drmem_lmb_size();
+ for_each_drmem_lmb(lmb) {
/* skip this block if it is reserved or not assigned to
* this partition */
- if ((drmem.flags & DRCONF_MEM_RESERVED)
- || !(drmem.flags & DRCONF_MEM_ASSIGNED))
+ if ((lmb->flags & DRCONF_MEM_RESERVED)
+ || !(lmb->flags & DRCONF_MEM_ASSIGNED))
continue;
- if ((scn_addr < drmem.base_addr)
- || (scn_addr >= (drmem.base_addr + lmb_size)))
+ if ((scn_addr < lmb->base_addr)
+ || (scn_addr >= (lmb->base_addr + lmb_size)))
continue;
- nid = of_drconf_to_nid_single(&drmem, &aa);
+ nid = of_drconf_to_nid_single(lmb);
break;
}
@@ -1080,7 +1010,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
if (memory) {
- nid = hot_add_drconf_scn_to_nid(memory, scn_addr);
+ nid = hot_add_drconf_scn_to_nid(scn_addr);
of_node_put(memory);
} else {
nid = hot_add_node_scn_to_nid(scn_addr);
@@ -1096,11 +1026,7 @@ static u64 hot_add_drconf_memory_max(void)
{
struct device_node *memory = NULL;
struct device_node *dn = NULL;
- unsigned int drconf_cell_cnt = 0;
- u64 lmb_size = 0;
- const __be32 *dm = NULL;
const __be64 *lrdr = NULL;
- struct of_drconf_cell drmem;
dn = of_find_node_by_path("/rtas");
if (dn) {
@@ -1112,14 +1038,8 @@ static u64 hot_add_drconf_memory_max(void)
memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
if (memory) {
- drconf_cell_cnt = of_get_drconf_memory(memory, &dm);
- lmb_size = of_get_lmb_size(memory);
-
- /* Advance to the last cell, each cell has 6 32 bit integers */
- dm += (drconf_cell_cnt - 1) * 6;
- read_drconf_cell(&drmem, &dm);
of_node_put(memory);
- return drmem.base_addr + lmb_size;
+ return drmem_lmb_memory_max();
}
return 0;
}
@@ -1278,6 +1198,42 @@ static long vphn_get_associativity(unsigned long cpu,
return rc;
}
+int find_and_online_cpu_nid(int cpu)
+{
+ __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
+ int new_nid;
+
+ /* Use associativity from first thread for all siblings */
+ vphn_get_associativity(cpu, associativity);
+ new_nid = associativity_to_nid(associativity);
+ if (new_nid < 0 || !node_possible(new_nid))
+ new_nid = first_online_node;
+
+ if (NODE_DATA(new_nid) == NULL) {
+#ifdef CONFIG_MEMORY_HOTPLUG
+ /*
+ * Need to ensure that NODE_DATA is initialized for a node from
+ * available memory (see memblock_alloc_try_nid). If unable to
+ * init the node, then default to nearest node that has memory
+ * installed.
+ */
+ if (try_online_node(new_nid))
+ new_nid = first_online_node;
+#else
+ /*
+ * Default to using the nearest node that has memory installed.
+ * Otherwise, it would be necessary to patch the kernel MM code
+ * to deal with more memoryless-node error conditions.
+ */
+ new_nid = first_online_node;
+#endif
+ }
+
+ pr_debug("%s:%d cpu %d nid %d\n", __FUNCTION__, __LINE__,
+ cpu, new_nid);
+ return new_nid;
+}
+
/*
* Update the CPU maps and sysfs entries for a single CPU when its NUMA
* characteristics change. This function doesn't perform any locking and is
@@ -1345,7 +1301,6 @@ int numa_update_cpu_topology(bool cpus_locked)
{
unsigned int cpu, sibling, changed = 0;
struct topology_update_data *updates, *ud;
- __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
cpumask_t updated_cpus;
struct device *dev;
int weight, new_nid, i = 0;
@@ -1383,11 +1338,7 @@ int numa_update_cpu_topology(bool cpus_locked)
continue;
}
- /* Use associativity from first thread for all siblings */
- vphn_get_associativity(cpu, associativity);
- new_nid = associativity_to_nid(associativity);
- if (new_nid < 0 || !node_online(new_nid))
- new_nid = first_online_node;
+ new_nid = find_and_online_cpu_nid(cpu);
if (new_nid == numa_cpu_lookup_table[cpu]) {
cpumask_andnot(&cpu_associativity_changes_mask,
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index cfbbee941a76..573a9a2ee455 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -579,6 +579,9 @@ void __init radix__early_init_mmu(void)
radix_init_iamr();
radix_init_pgtable();
+
+ if (cpu_has_feature(CPU_FTR_HVMODE))
+ tlbiel_all();
}
void radix__early_init_mmu_secondary(void)
@@ -600,6 +603,9 @@ void radix__early_init_mmu_secondary(void)
radix_init_amor();
}
radix_init_iamr();
+
+ if (cpu_has_feature(CPU_FTR_HVMODE))
+ tlbiel_all();
}
void radix__mmu_cleanup_all(void)
@@ -622,22 +628,11 @@ void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base,
* physical on those processors
*/
BUG_ON(first_memblock_base != 0);
+
/*
- * We limit the allocation that depend on ppc64_rma_size
- * to first_memblock_size. We also clamp it to 1GB to
- * avoid some funky things such as RTAS bugs.
- *
- * On radix config we really don't have a limitation
- * on real mode access. But keeping it as above works
- * well enough.
- */
- ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000);
- /*
- * Finally limit subsequent allocations. We really don't want
- * to limit the memblock allocations to rma_size. FIXME!! should
- * we even limit at all ?
+ * Radix mode is not limited by RMA / VRMA addressing.
*/
- memblock_set_current_limit(first_memblock_base + first_memblock_size);
+ ppc64_rma_size = ULONG_MAX;
}
#ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index a03ff3d99e0c..9f361ae571e9 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -54,7 +54,8 @@ static inline int pte_looks_normal(pte_t pte)
return 0;
#else
return (pte_val(pte) &
- (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) ==
+ (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER |
+ _PAGE_PRIVILEGED)) ==
(_PAGE_PRESENT | _PAGE_USER);
#endif
}
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index f6c7f54c0515..d35d9ad3c1cd 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -98,14 +98,7 @@ ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags)
/* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
flags &= ~(_PAGE_USER | _PAGE_EXEC);
-
-#ifdef _PAGE_BAP_SR
- /* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format
- * which means that we just cleared supervisor access... oops ;-) This
- * restores it
- */
- flags |= _PAGE_BAP_SR;
-#endif
+ flags |= _PAGE_PRIVILEGED;
return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 813ea22c3e00..c9a623c2d8a2 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -244,20 +244,8 @@ void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
/*
* Force kernel mapping.
*/
-#if defined(CONFIG_PPC_BOOK3S_64)
- flags |= _PAGE_PRIVILEGED;
-#else
flags &= ~_PAGE_USER;
-#endif
-
-
-#ifdef _PAGE_BAP_SR
- /* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format
- * which means that we just cleared supervisor access... oops ;-) This
- * restores it
- */
- flags |= _PAGE_BAP_SR;
-#endif
+ flags |= _PAGE_PRIVILEGED;
if (ppc_md.ioremap)
return ppc_md.ioremap(addr, size, flags, caller);
diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c
new file mode 100644
index 000000000000..ba71c5481f42
--- /dev/null
+++ b/arch/powerpc/mm/pkeys.c
@@ -0,0 +1,468 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * PowerPC Memory Protection Keys management
+ *
+ * Copyright 2017, Ram Pai, IBM Corporation.
+ */
+
+#include <asm/mman.h>
+#include <asm/setup.h>
+#include <linux/pkeys.h>
+#include <linux/of_device.h>
+
+DEFINE_STATIC_KEY_TRUE(pkey_disabled);
+bool pkey_execute_disable_supported;
+int pkeys_total; /* Total pkeys as per device tree */
+bool pkeys_devtree_defined; /* pkey property exported by device tree */
+u32 initial_allocation_mask; /* Bits set for reserved keys */
+u64 pkey_amr_uamor_mask; /* Bits in AMR/UMOR not to be touched */
+u64 pkey_iamr_mask; /* Bits in AMR not to be touched */
+
+#define AMR_BITS_PER_PKEY 2
+#define AMR_RD_BIT 0x1UL
+#define AMR_WR_BIT 0x2UL
+#define IAMR_EX_BIT 0x1UL
+#define PKEY_REG_BITS (sizeof(u64)*8)
+#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey+1) * AMR_BITS_PER_PKEY))
+
+static void scan_pkey_feature(void)
+{
+ u32 vals[2];
+ struct device_node *cpu;
+
+ cpu = of_find_node_by_type(NULL, "cpu");
+ if (!cpu)
+ return;
+
+ if (of_property_read_u32_array(cpu,
+ "ibm,processor-storage-keys", vals, 2))
+ return;
+
+ /*
+ * Since any pkey can be used for data or execute, we will just treat
+ * all keys as equal and track them as one entity.
+ */
+ pkeys_total = be32_to_cpu(vals[0]);
+ pkeys_devtree_defined = true;
+}
+
+static inline bool pkey_mmu_enabled(void)
+{
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ return pkeys_total;
+ else
+ return cpu_has_feature(CPU_FTR_PKEY);
+}
+
+int pkey_initialize(void)
+{
+ int os_reserved, i;
+
+ /*
+ * We define PKEY_DISABLE_EXECUTE in addition to the arch-neutral
+ * generic defines for PKEY_DISABLE_ACCESS and PKEY_DISABLE_WRITE.
+ * Ensure that the bits a distinct.
+ */
+ BUILD_BUG_ON(PKEY_DISABLE_EXECUTE &
+ (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
+
+ /*
+ * pkey_to_vmflag_bits() assumes that the pkey bits are contiguous
+ * in the vmaflag. Make sure that is really the case.
+ */
+ BUILD_BUG_ON(__builtin_clzl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) +
+ __builtin_popcountl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT)
+ != (sizeof(u64) * BITS_PER_BYTE));
+
+ /* scan the device tree for pkey feature */
+ scan_pkey_feature();
+
+ /*
+ * Let's assume 32 pkeys on P8 bare metal, if its not defined by device
+ * tree. We make this exception since skiboot forgot to expose this
+ * property on power8.
+ */
+ if (!pkeys_devtree_defined && !firmware_has_feature(FW_FEATURE_LPAR) &&
+ cpu_has_feature(CPU_FTRS_POWER8))
+ pkeys_total = 32;
+
+ /*
+ * Adjust the upper limit, based on the number of bits supported by
+ * arch-neutral code.
+ */
+ pkeys_total = min_t(int, pkeys_total,
+ (ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT));
+
+ if (!pkey_mmu_enabled() || radix_enabled() || !pkeys_total)
+ static_branch_enable(&pkey_disabled);
+ else
+ static_branch_disable(&pkey_disabled);
+
+ if (static_branch_likely(&pkey_disabled))
+ return 0;
+
+ /*
+ * The device tree cannot be relied to indicate support for
+ * execute_disable support. Instead we use a PVR check.
+ */
+ if (pvr_version_is(PVR_POWER7) || pvr_version_is(PVR_POWER7p))
+ pkey_execute_disable_supported = false;
+ else
+ pkey_execute_disable_supported = true;
+
+#ifdef CONFIG_PPC_4K_PAGES
+ /*
+ * The OS can manage only 8 pkeys due to its inability to represent them
+ * in the Linux 4K PTE.
+ */
+ os_reserved = pkeys_total - 8;
+#else
+ os_reserved = 0;
+#endif
+ /*
+ * Bits are in LE format. NOTE: 1, 0 are reserved.
+ * key 0 is the default key, which allows read/write/execute.
+ * key 1 is recommended not to be used. PowerISA(3.0) page 1015,
+ * programming note.
+ */
+ initial_allocation_mask = ~0x0;
+
+ /* register mask is in BE format */
+ pkey_amr_uamor_mask = ~0x0ul;
+ pkey_iamr_mask = ~0x0ul;
+
+ for (i = 2; i < (pkeys_total - os_reserved); i++) {
+ initial_allocation_mask &= ~(0x1 << i);
+ pkey_amr_uamor_mask &= ~(0x3ul << pkeyshift(i));
+ pkey_iamr_mask &= ~(0x1ul << pkeyshift(i));
+ }
+ return 0;
+}
+
+arch_initcall(pkey_initialize);
+
+void pkey_mm_init(struct mm_struct *mm)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return;
+ mm_pkey_allocation_map(mm) = initial_allocation_mask;
+ /* -1 means unallocated or invalid */
+ mm->context.execute_only_pkey = -1;
+}
+
+static inline u64 read_amr(void)
+{
+ return mfspr(SPRN_AMR);
+}
+
+static inline void write_amr(u64 value)
+{
+ mtspr(SPRN_AMR, value);
+}
+
+static inline u64 read_iamr(void)
+{
+ if (!likely(pkey_execute_disable_supported))
+ return 0x0UL;
+
+ return mfspr(SPRN_IAMR);
+}
+
+static inline void write_iamr(u64 value)
+{
+ if (!likely(pkey_execute_disable_supported))
+ return;
+
+ mtspr(SPRN_IAMR, value);
+}
+
+static inline u64 read_uamor(void)
+{
+ return mfspr(SPRN_UAMOR);
+}
+
+static inline void write_uamor(u64 value)
+{
+ mtspr(SPRN_UAMOR, value);
+}
+
+static bool is_pkey_enabled(int pkey)
+{
+ u64 uamor = read_uamor();
+ u64 pkey_bits = 0x3ul << pkeyshift(pkey);
+ u64 uamor_pkey_bits = (uamor & pkey_bits);
+
+ /*
+ * Both the bits in UAMOR corresponding to the key should be set or
+ * reset.
+ */
+ WARN_ON(uamor_pkey_bits && (uamor_pkey_bits != pkey_bits));
+ return !!(uamor_pkey_bits);
+}
+
+static inline void init_amr(int pkey, u8 init_bits)
+{
+ u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey));
+ u64 old_amr = read_amr() & ~((u64)(0x3ul) << pkeyshift(pkey));
+
+ write_amr(old_amr | new_amr_bits);
+}
+
+static inline void init_iamr(int pkey, u8 init_bits)
+{
+ u64 new_iamr_bits = (((u64)init_bits & 0x1UL) << pkeyshift(pkey));
+ u64 old_iamr = read_iamr() & ~((u64)(0x1ul) << pkeyshift(pkey));
+
+ write_iamr(old_iamr | new_iamr_bits);
+}
+
+static void pkey_status_change(int pkey, bool enable)
+{
+ u64 old_uamor;
+
+ /* Reset the AMR and IAMR bits for this key */
+ init_amr(pkey, 0x0);
+ init_iamr(pkey, 0x0);
+
+ /* Enable/disable key */
+ old_uamor = read_uamor();
+ if (enable)
+ old_uamor |= (0x3ul << pkeyshift(pkey));
+ else
+ old_uamor &= ~(0x3ul << pkeyshift(pkey));
+ write_uamor(old_uamor);
+}
+
+void __arch_activate_pkey(int pkey)
+{
+ pkey_status_change(pkey, true);
+}
+
+void __arch_deactivate_pkey(int pkey)
+{
+ pkey_status_change(pkey, false);
+}
+
+/*
+ * Set the access rights in AMR IAMR and UAMOR registers for @pkey to that
+ * specified in @init_val.
+ */
+int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+ unsigned long init_val)
+{
+ u64 new_amr_bits = 0x0ul;
+ u64 new_iamr_bits = 0x0ul;
+
+ if (!is_pkey_enabled(pkey))
+ return -EINVAL;
+
+ if (init_val & PKEY_DISABLE_EXECUTE) {
+ if (!pkey_execute_disable_supported)
+ return -EINVAL;
+ new_iamr_bits |= IAMR_EX_BIT;
+ }
+ init_iamr(pkey, new_iamr_bits);
+
+ /* Set the bits we need in AMR: */
+ if (init_val & PKEY_DISABLE_ACCESS)
+ new_amr_bits |= AMR_RD_BIT | AMR_WR_BIT;
+ else if (init_val & PKEY_DISABLE_WRITE)
+ new_amr_bits |= AMR_WR_BIT;
+
+ init_amr(pkey, new_amr_bits);
+ return 0;
+}
+
+void thread_pkey_regs_save(struct thread_struct *thread)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return;
+
+ /*
+ * TODO: Skip saving registers if @thread hasn't used any keys yet.
+ */
+ thread->amr = read_amr();
+ thread->iamr = read_iamr();
+ thread->uamor = read_uamor();
+}
+
+void thread_pkey_regs_restore(struct thread_struct *new_thread,
+ struct thread_struct *old_thread)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return;
+
+ /*
+ * TODO: Just set UAMOR to zero if @new_thread hasn't used any keys yet.
+ */
+ if (old_thread->amr != new_thread->amr)
+ write_amr(new_thread->amr);
+ if (old_thread->iamr != new_thread->iamr)
+ write_iamr(new_thread->iamr);
+ if (old_thread->uamor != new_thread->uamor)
+ write_uamor(new_thread->uamor);
+}
+
+void thread_pkey_regs_init(struct thread_struct *thread)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return;
+
+ write_amr(read_amr() & pkey_amr_uamor_mask);
+ write_iamr(read_iamr() & pkey_iamr_mask);
+ write_uamor(read_uamor() & pkey_amr_uamor_mask);
+}
+
+static inline bool pkey_allows_readwrite(int pkey)
+{
+ int pkey_shift = pkeyshift(pkey);
+
+ if (!is_pkey_enabled(pkey))
+ return true;
+
+ return !(read_amr() & ((AMR_RD_BIT|AMR_WR_BIT) << pkey_shift));
+}
+
+int __execute_only_pkey(struct mm_struct *mm)
+{
+ bool need_to_set_mm_pkey = false;
+ int execute_only_pkey = mm->context.execute_only_pkey;
+ int ret;
+
+ /* Do we need to assign a pkey for mm's execute-only maps? */
+ if (execute_only_pkey == -1) {
+ /* Go allocate one to use, which might fail */
+ execute_only_pkey = mm_pkey_alloc(mm);
+ if (execute_only_pkey < 0)
+ return -1;
+ need_to_set_mm_pkey = true;
+ }
+
+ /*
+ * We do not want to go through the relatively costly dance to set AMR
+ * if we do not need to. Check it first and assume that if the
+ * execute-only pkey is readwrite-disabled than we do not have to set it
+ * ourselves.
+ */
+ if (!need_to_set_mm_pkey && !pkey_allows_readwrite(execute_only_pkey))
+ return execute_only_pkey;
+
+ /*
+ * Set up AMR so that it denies access for everything other than
+ * execution.
+ */
+ ret = __arch_set_user_pkey_access(current, execute_only_pkey,
+ PKEY_DISABLE_ACCESS |
+ PKEY_DISABLE_WRITE);
+ /*
+ * If the AMR-set operation failed somehow, just return 0 and
+ * effectively disable execute-only support.
+ */
+ if (ret) {
+ mm_pkey_free(mm, execute_only_pkey);
+ return -1;
+ }
+
+ /* We got one, store it and use it from here on out */
+ if (need_to_set_mm_pkey)
+ mm->context.execute_only_pkey = execute_only_pkey;
+ return execute_only_pkey;
+}
+
+static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma)
+{
+ /* Do this check first since the vm_flags should be hot */
+ if ((vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) != VM_EXEC)
+ return false;
+
+ return (vma_pkey(vma) == vma->vm_mm->context.execute_only_pkey);
+}
+
+/*
+ * This should only be called for *plain* mprotect calls.
+ */
+int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot,
+ int pkey)
+{
+ /*
+ * If the currently associated pkey is execute-only, but the requested
+ * protection requires read or write, move it back to the default pkey.
+ */
+ if (vma_is_pkey_exec_only(vma) && (prot & (PROT_READ | PROT_WRITE)))
+ return 0;
+
+ /*
+ * The requested protection is execute-only. Hence let's use an
+ * execute-only pkey.
+ */
+ if (prot == PROT_EXEC) {
+ pkey = execute_only_pkey(vma->vm_mm);
+ if (pkey > 0)
+ return pkey;
+ }
+
+ /* Nothing to override. */
+ return vma_pkey(vma);
+}
+
+static bool pkey_access_permitted(int pkey, bool write, bool execute)
+{
+ int pkey_shift;
+ u64 amr;
+
+ if (!pkey)
+ return true;
+
+ if (!is_pkey_enabled(pkey))
+ return true;
+
+ pkey_shift = pkeyshift(pkey);
+ if (execute && !(read_iamr() & (IAMR_EX_BIT << pkey_shift)))
+ return true;
+
+ amr = read_amr(); /* Delay reading amr until absolutely needed */
+ return ((!write && !(amr & (AMR_RD_BIT << pkey_shift))) ||
+ (write && !(amr & (AMR_WR_BIT << pkey_shift))));
+}
+
+bool arch_pte_access_permitted(u64 pte, bool write, bool execute)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return true;
+
+ return pkey_access_permitted(pte_to_pkey_bits(pte), write, execute);
+}
+
+/*
+ * We only want to enforce protection keys on the current thread because we
+ * effectively have no access to AMR/IAMR for other threads or any way to tell
+ * which AMR/IAMR in a threaded process we could use.
+ *
+ * So do not enforce things if the VMA is not from the current mm, or if we are
+ * in a kernel thread.
+ */
+static inline bool vma_is_foreign(struct vm_area_struct *vma)
+{
+ if (!current->mm)
+ return true;
+
+ /* if it is not our ->mm, it has to be foreign */
+ if (current->mm != vma->vm_mm)
+ return true;
+
+ return false;
+}
+
+bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write,
+ bool execute, bool foreign)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return true;
+ /*
+ * Do not enforce our key-permissions on a foreign vma.
+ */
+ if (foreign || vma_is_foreign(vma))
+ return true;
+
+ return pkey_access_permitted(vma_pkey(vma), write, execute);
+}
diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c
index 781532d7bc4d..f14a07c2fb90 100644
--- a/arch/powerpc/mm/subpage-prot.c
+++ b/arch/powerpc/mm/subpage-prot.c
@@ -195,6 +195,9 @@ long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map)
unsigned long next, limit;
int err;
+ if (radix_enabled())
+ return -ENOENT;
+
/* Check parameters */
if ((addr & ~PAGE_MASK) || (len & ~PAGE_MASK) ||
addr >= mm->task_size || len >= mm->task_size ||
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 884f4b705b57..71d1b19ad1c0 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -23,6 +23,72 @@
#define RIC_FLUSH_PWC 1
#define RIC_FLUSH_ALL 2
+/*
+ * tlbiel instruction for radix, set invalidation
+ * i.e., r=1 and is=01 or is=10 or is=11
+ */
+static inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
+ unsigned int pid,
+ unsigned int ric, unsigned int prs)
+{
+ unsigned long rb;
+ unsigned long rs;
+ unsigned int r = 1; /* radix format */
+
+ rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
+ rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
+
+ asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4)
+ : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "r"(r)
+ : "memory");
+}
+
+static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
+{
+ unsigned int set;
+
+ asm volatile("ptesync": : :"memory");
+
+ /*
+ * Flush the first set of the TLB, and the entire Page Walk Cache
+ * and partition table entries. Then flush the remaining sets of the
+ * TLB.
+ */
+ tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
+ for (set = 1; set < num_sets; set++)
+ tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0);
+
+ /* Do the same for process scoped entries. */
+ tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1);
+ for (set = 1; set < num_sets; set++)
+ tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1);
+
+ asm volatile("ptesync": : :"memory");
+}
+
+void radix__tlbiel_all(unsigned int action)
+{
+ unsigned int is;
+
+ switch (action) {
+ case TLB_INVAL_SCOPE_GLOBAL:
+ is = 3;
+ break;
+ case TLB_INVAL_SCOPE_LPID:
+ is = 2;
+ break;
+ default:
+ BUG();
+ }
+
+ if (early_cpu_has_feature(CPU_FTR_ARCH_300))
+ tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is);
+ else
+ WARN(1, "%s called on pre-POWER9 CPU\n", __func__);
+
+ asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
+}
+
static inline void __tlbiel_pid(unsigned long pid, int set,
unsigned long ric)
{
@@ -600,14 +666,12 @@ void radix__flush_tlb_all(void)
*/
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
- trace_tlbie(0, 0, rb, rs, ric, prs, r);
/*
* now flush host entires by passing PRS = 0 and LPID == 0
*/
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
- trace_tlbie(0, 0, rb, 0, ric, prs, r);
}
void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index bfc4a0869609..15fe5f0c8665 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -388,7 +388,10 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end)
{
- flush_tlb_mm(vma->vm_mm);
+ if (end - start == PAGE_SIZE && !(start & ~PAGE_MASK))
+ flush_tlb_page(vma, start);
+ else
+ flush_tlb_mm(vma->vm_mm);
}
EXPORT_SYMBOL(flush_tlb_range);
diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c
index 3c39f05f0af3..6c0020d1c561 100644
--- a/arch/powerpc/perf/8xx-pmu.c
+++ b/arch/powerpc/perf/8xx-pmu.c
@@ -18,6 +18,7 @@
#include <asm/machdep.h>
#include <asm/firmware.h>
#include <asm/ptrace.h>
+#include <asm/code-patching.h>
#define PERF_8xx_ID_CPU_CYCLES 1
#define PERF_8xx_ID_HW_INSTRUCTIONS 2
@@ -30,8 +31,13 @@
extern unsigned long itlb_miss_counter, dtlb_miss_counter;
extern atomic_t instruction_counter;
+extern unsigned int itlb_miss_perf, dtlb_miss_perf;
+extern unsigned int itlb_miss_exit_1, itlb_miss_exit_2;
+extern unsigned int dtlb_miss_exit_1, dtlb_miss_exit_2, dtlb_miss_exit_3;
static atomic_t insn_ctr_ref;
+static atomic_t itlb_miss_ref;
+static atomic_t dtlb_miss_ref;
static s64 get_insn_ctr(void)
{
@@ -96,9 +102,24 @@ static int mpc8xx_pmu_add(struct perf_event *event, int flags)
val = get_insn_ctr();
break;
case PERF_8xx_ID_ITLB_LOAD_MISS:
+ if (atomic_inc_return(&itlb_miss_ref) == 1) {
+ unsigned long target = (unsigned long)&itlb_miss_perf;
+
+ patch_branch(&itlb_miss_exit_1, target, 0);
+#ifndef CONFIG_PIN_TLB_TEXT
+ patch_branch(&itlb_miss_exit_2, target, 0);
+#endif
+ }
val = itlb_miss_counter;
break;
case PERF_8xx_ID_DTLB_LOAD_MISS:
+ if (atomic_inc_return(&dtlb_miss_ref) == 1) {
+ unsigned long target = (unsigned long)&dtlb_miss_perf;
+
+ patch_branch(&dtlb_miss_exit_1, target, 0);
+ patch_branch(&dtlb_miss_exit_2, target, 0);
+ patch_branch(&dtlb_miss_exit_3, target, 0);
+ }
val = dtlb_miss_counter;
break;
}
@@ -143,13 +164,36 @@ static void mpc8xx_pmu_read(struct perf_event *event)
static void mpc8xx_pmu_del(struct perf_event *event, int flags)
{
+ /* mfspr r10, SPRN_SPRG_SCRATCH0 */
+ unsigned int insn = PPC_INST_MFSPR | __PPC_RS(R10) |
+ __PPC_SPR(SPRN_SPRG_SCRATCH0);
+
mpc8xx_pmu_read(event);
- if (event_type(event) != PERF_8xx_ID_HW_INSTRUCTIONS)
- return;
/* If it was the last user, stop counting to avoid useles overhead */
- if (atomic_dec_return(&insn_ctr_ref) == 0)
- mtspr(SPRN_ICTRL, 7);
+ switch (event_type(event)) {
+ case PERF_8xx_ID_CPU_CYCLES:
+ break;
+ case PERF_8xx_ID_HW_INSTRUCTIONS:
+ if (atomic_dec_return(&insn_ctr_ref) == 0)
+ mtspr(SPRN_ICTRL, 7);
+ break;
+ case PERF_8xx_ID_ITLB_LOAD_MISS:
+ if (atomic_dec_return(&itlb_miss_ref) == 0) {
+ patch_instruction(&itlb_miss_exit_1, insn);
+#ifndef CONFIG_PIN_TLB_TEXT
+ patch_instruction(&itlb_miss_exit_2, insn);
+#endif
+ }
+ break;
+ case PERF_8xx_ID_DTLB_LOAD_MISS:
+ if (atomic_dec_return(&dtlb_miss_ref) == 0) {
+ patch_instruction(&dtlb_miss_exit_1, insn);
+ patch_instruction(&dtlb_miss_exit_2, insn);
+ patch_instruction(&dtlb_miss_exit_3, insn);
+ }
+ break;
+ }
}
static struct pmu mpc8xx_pmu = {
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index 225c9c86d7c0..57ebc655d2ac 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -15,7 +15,7 @@ obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o
obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o
-obj-$(CONFIG_PPC_8xx_PERF_EVENT) += 8xx-pmu.o
+obj-$(CONFIG_PPC_8xx) += 8xx-pmu.o
obj-$(CONFIG_PPC64) += $(obj64-y)
obj-$(CONFIG_PPC32) += $(obj32-y)
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index fce545774d50..f89bbd54ecec 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -322,7 +322,7 @@ static inline void perf_read_regs(struct pt_regs *regs)
*/
static inline int perf_intr_is_nmi(struct pt_regs *regs)
{
- return !regs->softe;
+ return (regs->softe & IRQS_DISABLED);
}
/*
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index be4e7f84f70a..d7532e7b9ab5 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -40,7 +40,6 @@ static struct imc_pmu *core_imc_pmu;
/* Thread IMC data structures and variables */
static DEFINE_PER_CPU(u64 *, thread_imc_mem);
-static struct imc_pmu *thread_imc_pmu;
static int thread_imc_mem_size;
struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
@@ -117,17 +116,13 @@ static struct attribute *device_str_attr_create(const char *name, const char *st
return &attr->attr.attr;
}
-struct imc_events *imc_parse_event(struct device_node *np, const char *scale,
- const char *unit, const char *prefix, u32 base)
+static int imc_parse_event(struct device_node *np, const char *scale,
+ const char *unit, const char *prefix,
+ u32 base, struct imc_events *event)
{
- struct imc_events *event;
const char *s;
u32 reg;
- event = kzalloc(sizeof(struct imc_events), GFP_KERNEL);
- if (!event)
- return NULL;
-
if (of_property_read_u32(np, "reg", &reg))
goto error;
/* Add the base_reg value to the "reg" */
@@ -158,14 +153,32 @@ struct imc_events *imc_parse_event(struct device_node *np, const char *scale,
goto error;
}
- return event;
+ return 0;
error:
kfree(event->unit);
kfree(event->scale);
kfree(event->name);
- kfree(event);
+ return -EINVAL;
+}
+
+/*
+ * imc_free_events: Function to cleanup the events list, having
+ * "nr_entries".
+ */
+static void imc_free_events(struct imc_events *events, int nr_entries)
+{
+ int i;
- return NULL;
+ /* Nothing to clean, return */
+ if (!events)
+ return;
+ for (i = 0; i < nr_entries; i++) {
+ kfree(events[i].unit);
+ kfree(events[i].scale);
+ kfree(events[i].name);
+ }
+
+ kfree(events);
}
/*
@@ -177,9 +190,8 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
struct attribute_group *attr_group;
struct attribute **attrs, *dev_str;
struct device_node *np, *pmu_events;
- struct imc_events *ev;
u32 handle, base_reg;
- int i=0, j=0, ct;
+ int i = 0, j = 0, ct, ret;
const char *prefix, *g_scale, *g_unit;
const char *ev_val_str, *ev_scale_str, *ev_unit_str;
@@ -217,15 +229,17 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
ct = 0;
/* Parse the events and update the struct */
for_each_child_of_node(pmu_events, np) {
- ev = imc_parse_event(np, g_scale, g_unit, prefix, base_reg);
- if (ev)
- pmu->events[ct++] = ev;
+ ret = imc_parse_event(np, g_scale, g_unit, prefix, base_reg, &pmu->events[ct]);
+ if (!ret)
+ ct++;
}
/* Allocate memory for attribute group */
attr_group = kzalloc(sizeof(*attr_group), GFP_KERNEL);
- if (!attr_group)
+ if (!attr_group) {
+ imc_free_events(pmu->events, ct);
return -ENOMEM;
+ }
/*
* Allocate memory for attributes.
@@ -238,31 +252,31 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
attrs = kcalloc(((ct * 3) + 1), sizeof(struct attribute *), GFP_KERNEL);
if (!attrs) {
kfree(attr_group);
- kfree(pmu->events);
+ imc_free_events(pmu->events, ct);
return -ENOMEM;
}
attr_group->name = "events";
attr_group->attrs = attrs;
do {
- ev_val_str = kasprintf(GFP_KERNEL, "event=0x%x", pmu->events[i]->value);
- dev_str = device_str_attr_create(pmu->events[i]->name, ev_val_str);
+ ev_val_str = kasprintf(GFP_KERNEL, "event=0x%x", pmu->events[i].value);
+ dev_str = device_str_attr_create(pmu->events[i].name, ev_val_str);
if (!dev_str)
continue;
attrs[j++] = dev_str;
- if (pmu->events[i]->scale) {
- ev_scale_str = kasprintf(GFP_KERNEL, "%s.scale",pmu->events[i]->name);
- dev_str = device_str_attr_create(ev_scale_str, pmu->events[i]->scale);
+ if (pmu->events[i].scale) {
+ ev_scale_str = kasprintf(GFP_KERNEL, "%s.scale", pmu->events[i].name);
+ dev_str = device_str_attr_create(ev_scale_str, pmu->events[i].scale);
if (!dev_str)
continue;
attrs[j++] = dev_str;
}
- if (pmu->events[i]->unit) {
- ev_unit_str = kasprintf(GFP_KERNEL, "%s.unit",pmu->events[i]->name);
- dev_str = device_str_attr_create(ev_unit_str, pmu->events[i]->unit);
+ if (pmu->events[i].unit) {
+ ev_unit_str = kasprintf(GFP_KERNEL, "%s.unit", pmu->events[i].name);
+ dev_str = device_str_attr_create(ev_unit_str, pmu->events[i].unit);
if (!dev_str)
continue;
@@ -273,7 +287,6 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
/* Save the event attribute */
pmu->attr_groups[IMC_EVENT_ATTR] = attr_group;
- kfree(pmu->events);
return 0;
}
@@ -611,7 +624,8 @@ static int ppc_core_imc_cpu_online(unsigned int cpu)
static int ppc_core_imc_cpu_offline(unsigned int cpu)
{
- unsigned int ncpu, core_id;
+ unsigned int core_id;
+ int ncpu;
struct imc_pmu_ref *ref;
/*
@@ -1171,6 +1185,15 @@ static void cleanup_all_thread_imc_memory(void)
}
}
+/* Function to free the attr_groups which are dynamically allocated */
+static void imc_common_mem_free(struct imc_pmu *pmu_ptr)
+{
+ if (pmu_ptr->attr_groups[IMC_EVENT_ATTR])
+ kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
+ kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
+ kfree(pmu_ptr);
+}
+
/*
* Common function to unregister cpu hotplug callback and
* free the memory.
@@ -1203,13 +1226,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE);
cleanup_all_thread_imc_memory();
}
-
- /* Only free the attr_groups which are dynamically allocated */
- if (pmu_ptr->attr_groups[IMC_EVENT_ATTR])
- kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
- kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
- kfree(pmu_ptr);
- return;
}
@@ -1258,8 +1274,10 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
core_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref),
GFP_KERNEL);
- if (!core_imc_refc)
+ if (!core_imc_refc) {
+ kfree(pmu_ptr->mem_info);
return -ENOMEM;
+ }
core_imc_pmu = pmu_ptr;
break;
@@ -1272,11 +1290,12 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
thread_imc_mem_size = pmu_ptr->counter_mem_size;
for_each_online_cpu(cpu) {
res = thread_imc_mem_alloc(cpu, pmu_ptr->counter_mem_size);
- if (res)
+ if (res) {
+ cleanup_all_thread_imc_memory();
return res;
+ }
}
- thread_imc_pmu = pmu_ptr;
break;
default:
return -EINVAL;
@@ -1300,8 +1319,10 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
int ret;
ret = imc_mem_init(pmu_ptr, parent, pmu_idx);
- if (ret)
- goto err_free;
+ if (ret) {
+ imc_common_mem_free(pmu_ptr);
+ return ret;
+ }
switch (pmu_ptr->domain) {
case IMC_DOMAIN_NEST:
@@ -1368,6 +1389,7 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
return 0;
err_free:
+ imc_common_mem_free(pmu_ptr);
imc_common_cpuhp_mem_free(pmu_ptr);
return ret;
}
diff --git a/arch/powerpc/platforms/44x/fsp2.c b/arch/powerpc/platforms/44x/fsp2.c
index 92e98048404f..04f0c73a9b4f 100644
--- a/arch/powerpc/platforms/44x/fsp2.c
+++ b/arch/powerpc/platforms/44x/fsp2.c
@@ -27,6 +27,17 @@
#include <asm/time.h>
#include <asm/uic.h>
#include <asm/ppc4xx.h>
+#include <asm/dcr.h>
+#include <linux/interrupt.h>
+#include <linux/of_irq.h>
+#include "fsp2.h"
+
+#define FSP2_BUS_ERR "ibm,bus-error-irq"
+#define FSP2_CMU_ERR "ibm,cmu-error-irq"
+#define FSP2_CONF_ERR "ibm,conf-error-irq"
+#define FSP2_OPBD_ERR "ibm,opbd-error-irq"
+#define FSP2_MCUE "ibm,mc-ue-irq"
+#define FSP2_RST_WRN "ibm,reset-warning-irq"
static __initdata struct of_device_id fsp2_of_bus[] = {
{ .compatible = "ibm,plb4", },
@@ -35,6 +46,194 @@ static __initdata struct of_device_id fsp2_of_bus[] = {
{},
};
+static void l2regs(void)
+{
+ pr_err("L2 Controller:\n");
+ pr_err("MCK: 0x%08x\n", mfl2(L2MCK));
+ pr_err("INT: 0x%08x\n", mfl2(L2INT));
+ pr_err("PLBSTAT0: 0x%08x\n", mfl2(L2PLBSTAT0));
+ pr_err("PLBSTAT1: 0x%08x\n", mfl2(L2PLBSTAT1));
+ pr_err("ARRSTAT0: 0x%08x\n", mfl2(L2ARRSTAT0));
+ pr_err("ARRSTAT1: 0x%08x\n", mfl2(L2ARRSTAT1));
+ pr_err("ARRSTAT2: 0x%08x\n", mfl2(L2ARRSTAT2));
+ pr_err("CPUSTAT: 0x%08x\n", mfl2(L2CPUSTAT));
+ pr_err("RACSTAT0: 0x%08x\n", mfl2(L2RACSTAT0));
+ pr_err("WACSTAT0: 0x%08x\n", mfl2(L2WACSTAT0));
+ pr_err("WACSTAT1: 0x%08x\n", mfl2(L2WACSTAT1));
+ pr_err("WACSTAT2: 0x%08x\n", mfl2(L2WACSTAT2));
+ pr_err("WDFSTAT: 0x%08x\n", mfl2(L2WDFSTAT));
+ pr_err("LOG0: 0x%08x\n", mfl2(L2LOG0));
+ pr_err("LOG1: 0x%08x\n", mfl2(L2LOG1));
+ pr_err("LOG2: 0x%08x\n", mfl2(L2LOG2));
+ pr_err("LOG3: 0x%08x\n", mfl2(L2LOG3));
+ pr_err("LOG4: 0x%08x\n", mfl2(L2LOG4));
+ pr_err("LOG5: 0x%08x\n", mfl2(L2LOG5));
+}
+
+static void show_plbopb_regs(u32 base, int num)
+{
+ pr_err("\nPLBOPB Bridge %d:\n", num);
+ pr_err("GESR0: 0x%08x\n", mfdcr(base + PLB4OPB_GESR0));
+ pr_err("GESR1: 0x%08x\n", mfdcr(base + PLB4OPB_GESR1));
+ pr_err("GESR2: 0x%08x\n", mfdcr(base + PLB4OPB_GESR2));
+ pr_err("GEARU: 0x%08x\n", mfdcr(base + PLB4OPB_GEARU));
+ pr_err("GEAR: 0x%08x\n", mfdcr(base + PLB4OPB_GEAR));
+}
+
+static irqreturn_t bus_err_handler(int irq, void *data)
+{
+ pr_err("Bus Error\n");
+
+ l2regs();
+
+ pr_err("\nPLB6 Controller:\n");
+ pr_err("BC_SHD: 0x%08x\n", mfdcr(DCRN_PLB6_SHD));
+ pr_err("BC_ERR: 0x%08x\n", mfdcr(DCRN_PLB6_ERR));
+
+ pr_err("\nPLB6-to-PLB4 Bridge:\n");
+ pr_err("ESR: 0x%08x\n", mfdcr(DCRN_PLB6PLB4_ESR));
+ pr_err("EARH: 0x%08x\n", mfdcr(DCRN_PLB6PLB4_EARH));
+ pr_err("EARL: 0x%08x\n", mfdcr(DCRN_PLB6PLB4_EARL));
+
+ pr_err("\nPLB4-to-PLB6 Bridge:\n");
+ pr_err("ESR: 0x%08x\n", mfdcr(DCRN_PLB4PLB6_ESR));
+ pr_err("EARH: 0x%08x\n", mfdcr(DCRN_PLB4PLB6_EARH));
+ pr_err("EARL: 0x%08x\n", mfdcr(DCRN_PLB4PLB6_EARL));
+
+ pr_err("\nPLB6-to-MCIF Bridge:\n");
+ pr_err("BESR0: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BESR0));
+ pr_err("BESR1: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BESR1));
+ pr_err("BEARH: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BEARH));
+ pr_err("BEARL: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BEARL));
+
+ pr_err("\nPLB4 Arbiter:\n");
+ pr_err("P0ESRH 0x%08x\n", mfdcr(DCRN_PLB4_P0ESRH));
+ pr_err("P0ESRL 0x%08x\n", mfdcr(DCRN_PLB4_P0ESRL));
+ pr_err("P0EARH 0x%08x\n", mfdcr(DCRN_PLB4_P0EARH));
+ pr_err("P0EARH 0x%08x\n", mfdcr(DCRN_PLB4_P0EARH));
+ pr_err("P1ESRH 0x%08x\n", mfdcr(DCRN_PLB4_P1ESRH));
+ pr_err("P1ESRL 0x%08x\n", mfdcr(DCRN_PLB4_P1ESRL));
+ pr_err("P1EARH 0x%08x\n", mfdcr(DCRN_PLB4_P1EARH));
+ pr_err("P1EARH 0x%08x\n", mfdcr(DCRN_PLB4_P1EARH));
+
+ show_plbopb_regs(DCRN_PLB4OPB0_BASE, 0);
+ show_plbopb_regs(DCRN_PLB4OPB1_BASE, 1);
+ show_plbopb_regs(DCRN_PLB4OPB2_BASE, 2);
+ show_plbopb_regs(DCRN_PLB4OPB3_BASE, 3);
+
+ pr_err("\nPLB4-to-AHB Bridge:\n");
+ pr_err("ESR: 0x%08x\n", mfdcr(DCRN_PLB4AHB_ESR));
+ pr_err("SEUAR: 0x%08x\n", mfdcr(DCRN_PLB4AHB_SEUAR));
+ pr_err("SELAR: 0x%08x\n", mfdcr(DCRN_PLB4AHB_SELAR));
+
+ pr_err("\nAHB-to-PLB4 Bridge:\n");
+ pr_err("\nESR: 0x%08x\n", mfdcr(DCRN_AHBPLB4_ESR));
+ pr_err("\nEAR: 0x%08x\n", mfdcr(DCRN_AHBPLB4_EAR));
+ panic("Bus Error\n");
+}
+
+static irqreturn_t cmu_err_handler(int irq, void *data) {
+ pr_err("CMU Error\n");
+ pr_err("FIR0: 0x%08x\n", mfcmu(CMUN_FIR0));
+ panic("CMU Error\n");
+}
+
+static irqreturn_t conf_err_handler(int irq, void *data) {
+ pr_err("Configuration Logic Error\n");
+ pr_err("CONF_FIR: 0x%08x\n", mfdcr(DCRN_CONF_FIR_RWC));
+ pr_err("RPERR0: 0x%08x\n", mfdcr(DCRN_CONF_RPERR0));
+ pr_err("RPERR1: 0x%08x\n", mfdcr(DCRN_CONF_RPERR1));
+ panic("Configuration Logic Error\n");
+}
+
+static irqreturn_t opbd_err_handler(int irq, void *data) {
+ panic("OPBD Error\n");
+}
+
+static irqreturn_t mcue_handler(int irq, void *data) {
+ pr_err("DDR: Uncorrectable Error\n");
+ pr_err("MCSTAT: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_MCSTAT));
+ pr_err("MCOPT1: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_MCOPT1));
+ pr_err("MCOPT2: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_MCOPT2));
+ pr_err("PHYSTAT: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_PHYSTAT));
+ pr_err("CFGR0: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR0));
+ pr_err("CFGR1: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR1));
+ pr_err("CFGR2: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR2));
+ pr_err("CFGR3: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR3));
+ pr_err("SCRUB_CNTL: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_SCRUB_CNTL));
+ pr_err("ECCERR_PORT0: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECCERR_PORT0));
+ pr_err("ECCERR_ADDR_PORT0: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECCERR_ADDR_PORT0));
+ pr_err("ECCERR_CNT_PORT0: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECCERR_COUNT_PORT0));
+ pr_err("ECC_CHECK_PORT0: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECC_CHECK_PORT0));
+ pr_err("MCER0: 0x%08x\n",
+ mfdcr(DCRN_CW_BASE + DCRN_CW_MCER0));
+ pr_err("MCER1: 0x%08x\n",
+ mfdcr(DCRN_CW_BASE + DCRN_CW_MCER1));
+ pr_err("BESR: 0x%08x\n",
+ mfdcr(DCRN_PLB6MCIF_BESR0));
+ pr_err("BEARL: 0x%08x\n",
+ mfdcr(DCRN_PLB6MCIF_BEARL));
+ pr_err("BEARH: 0x%08x\n",
+ mfdcr(DCRN_PLB6MCIF_BEARH));
+ panic("DDR: Uncorrectable Error\n");
+}
+
+static irqreturn_t rst_wrn_handler(int irq, void *data) {
+ u32 crcs = mfcmu(CMUN_CRCS);
+ switch (crcs & CRCS_STAT_MASK) {
+ case CRCS_STAT_CHIP_RST_B:
+ panic("Received chassis-initiated reset request");
+ default:
+ panic("Unknown external reset: CRCS=0x%x", crcs);
+ }
+}
+
+static void node_irq_request(const char *compat, irq_handler_t errirq_handler)
+{
+ struct device_node *np;
+ unsigned int irq;
+ int32_t rc;
+
+ for_each_compatible_node(np, NULL, compat) {
+ irq = irq_of_parse_and_map(np, 0);
+ if (irq == NO_IRQ) {
+ pr_err("device tree node %s is missing a interrupt",
+ np->name);
+ return;
+ }
+
+ rc = request_irq(irq, errirq_handler, 0, np->name, np);
+ if (rc) {
+ pr_err("fsp_of_probe: request_irq failed: np=%s rc=%d",
+ np->full_name, rc);
+ return;
+ }
+ }
+}
+
+static void critical_irq_setup(void)
+{
+ node_irq_request(FSP2_CMU_ERR, cmu_err_handler);
+ node_irq_request(FSP2_BUS_ERR, bus_err_handler);
+ node_irq_request(FSP2_CONF_ERR, conf_err_handler);
+ node_irq_request(FSP2_OPBD_ERR, opbd_err_handler);
+ node_irq_request(FSP2_MCUE, mcue_handler);
+ node_irq_request(FSP2_RST_WRN, rst_wrn_handler);
+}
+
static int __init fsp2_device_probe(void)
{
of_platform_bus_probe(NULL, fsp2_of_bus, NULL);
@@ -44,18 +243,76 @@ machine_device_initcall(fsp2, fsp2_device_probe);
static int __init fsp2_probe(void)
{
+ u32 val;
unsigned long root = of_get_flat_dt_root();
if (!of_flat_dt_is_compatible(root, "ibm,fsp2"))
return 0;
+
+ /* Clear BC_ERR and mask snoopable request plb errors. */
+ val = mfdcr(DCRN_PLB6_CR0);
+ val |= 0x20000000;
+ mtdcr(DCRN_PLB6_BASE, val);
+ mtdcr(DCRN_PLB6_HD, 0xffff0000);
+ mtdcr(DCRN_PLB6_SHD, 0xffff0000);
+
+ /* TVSENSE reset is blocked (clock gated) by the POR default of the TVS
+ * sleep config bit. As a consequence, TVSENSE will provide erratic
+ * sensor values, which may result in spurious (parity) errors
+ * recorded in the CMU FIR and leading to erroneous interrupt requests
+ * once the CMU interrupt is unmasked.
+ */
+
+ /* 1. set TVS1[UNDOZE] */
+ val = mfcmu(CMUN_TVS1);
+ val |= 0x4;
+ mtcmu(CMUN_TVS1, val);
+
+ /* 2. clear FIR[TVS] and FIR[TVSPAR] */
+ val = mfcmu(CMUN_FIR0);
+ val |= 0x30000000;
+ mtcmu(CMUN_FIR0, val);
+
+ /* L2 machine checks */
+ mtl2(L2PLBMCKEN0, 0xffffffff);
+ mtl2(L2PLBMCKEN1, 0x0000ffff);
+ mtl2(L2ARRMCKEN0, 0xffffffff);
+ mtl2(L2ARRMCKEN1, 0xffffffff);
+ mtl2(L2ARRMCKEN2, 0xfffff000);
+ mtl2(L2CPUMCKEN, 0xffffffff);
+ mtl2(L2RACMCKEN0, 0xffffffff);
+ mtl2(L2WACMCKEN0, 0xffffffff);
+ mtl2(L2WACMCKEN1, 0xffffffff);
+ mtl2(L2WACMCKEN2, 0xffffffff);
+ mtl2(L2WDFMCKEN, 0xffffffff);
+
+ /* L2 interrupts */
+ mtl2(L2PLBINTEN1, 0xffff0000);
+
+ /*
+ * At a global level, enable all L2 machine checks and interrupts
+ * reported by the L2 subsystems, except for the external machine check
+ * input (UIC0.1).
+ */
+ mtl2(L2MCKEN, 0x000007ff);
+ mtl2(L2INTEN, 0x000004ff);
+
+ /* Enable FSP-2 configuration logic parity errors */
+ mtdcr(DCRN_CONF_EIR_RS, 0x80000000);
return 1;
}
+static void __init fsp2_irq_init(void)
+{
+ uic_init_tree();
+ critical_irq_setup();
+}
+
define_machine(fsp2) {
.name = "FSP-2",
.probe = fsp2_probe,
.progress = udbg_progress,
- .init_IRQ = uic_init_tree,
+ .init_IRQ = fsp2_irq_init,
.get_irq = uic_get_irq,
.restart = ppc4xx_reset_system,
.calibrate_decr = generic_calibrate_decr,
diff --git a/arch/powerpc/platforms/44x/fsp2.h b/arch/powerpc/platforms/44x/fsp2.h
new file mode 100644
index 000000000000..9e1d52754c8b
--- /dev/null
+++ b/arch/powerpc/platforms/44x/fsp2.h
@@ -0,0 +1,272 @@
+#ifndef _ASM_POWERPC_FSP_DCR_H_
+#define _ASM_POWERPC_FSP_DCR_H_
+#ifdef __KERNEL__
+#include <asm/dcr.h>
+
+#define DCRN_CMU_ADDR 0x00C /* Chip management unic addr */
+#define DCRN_CMU_DATA 0x00D /* Chip management unic data */
+
+/* PLB4 Arbiter */
+#define DCRN_PLB4_PCBI 0x010 /* PLB Crossbar ID/Rev Register */
+#define DCRN_PLB4_P0ACR 0x011 /* PLB0 Arbiter Control Register */
+#define DCRN_PLB4_P0ESRL 0x012 /* PLB0 Error Status Register Low */
+#define DCRN_PLB4_P0ESRH 0x013 /* PLB0 Error Status Register High */
+#define DCRN_PLB4_P0EARL 0x014 /* PLB0 Error Address Register Low */
+#define DCRN_PLB4_P0EARH 0x015 /* PLB0 Error Address Register High */
+#define DCRN_PLB4_P0ESRLS 0x016 /* PLB0 Error Status Register Low Set*/
+#define DCRN_PLB4_P0ESRHS 0x017 /* PLB0 Error Status Register High */
+#define DCRN_PLB4_PCBC 0x018 /* PLB Crossbar Control Register */
+#define DCRN_PLB4_P1ACR 0x019 /* PLB1 Arbiter Control Register */
+#define DCRN_PLB4_P1ESRL 0x01A /* PLB1 Error Status Register Low */
+#define DCRN_PLB4_P1ESRH 0x01B /* PLB1 Error Status Register High */
+#define DCRN_PLB4_P1EARL 0x01C /* PLB1 Error Address Register Low */
+#define DCRN_PLB4_P1EARH 0x01D /* PLB1 Error Address Register High */
+#define DCRN_PLB4_P1ESRLS 0x01E /* PLB1 Error Status Register Low Set*/
+#define DCRN_PLB4_P1ESRHS 0x01F /*PLB1 Error Status Register High Set*/
+
+/* PLB4/OPB bridge 0, 1, 2, 3 */
+#define DCRN_PLB4OPB0_BASE 0x020
+#define DCRN_PLB4OPB1_BASE 0x030
+#define DCRN_PLB4OPB2_BASE 0x040
+#define DCRN_PLB4OPB3_BASE 0x050
+
+#define PLB4OPB_GESR0 0x0 /* Error status 0: Master Dev 0-3 */
+#define PLB4OPB_GEAR 0x2 /* Error Address Register */
+#define PLB4OPB_GEARU 0x3 /* Error Upper Address Register */
+#define PLB4OPB_GESR1 0x4 /* Error Status 1: Master Dev 4-7 */
+#define PLB4OPB_GESR2 0xC /* Error Status 2: Master Dev 8-11 */
+
+/* PLB4-to-AHB Bridge */
+#define DCRN_PLB4AHB_BASE 0x400
+#define DCRN_PLB4AHB_SEUAR (DCRN_PLB4AHB_BASE + 1)
+#define DCRN_PLB4AHB_SELAR (DCRN_PLB4AHB_BASE + 2)
+#define DCRN_PLB4AHB_ESR (DCRN_PLB4AHB_BASE + 3)
+#define DCRN_AHBPLB4_ESR (DCRN_PLB4AHB_BASE + 8)
+#define DCRN_AHBPLB4_EAR (DCRN_PLB4AHB_BASE + 9)
+
+/* PLB6 Controller */
+#define DCRN_PLB6_BASE 0x11111300
+#define DCRN_PLB6_CR0 (DCRN_PLB6_BASE)
+#define DCRN_PLB6_ERR (DCRN_PLB6_BASE + 0x0B)
+#define DCRN_PLB6_HD (DCRN_PLB6_BASE + 0x0E)
+#define DCRN_PLB6_SHD (DCRN_PLB6_BASE + 0x10)
+
+/* PLB4-to-PLB6 Bridge */
+#define DCRN_PLB4PLB6_BASE 0x11111320
+#define DCRN_PLB4PLB6_ESR (DCRN_PLB4PLB6_BASE + 1)
+#define DCRN_PLB4PLB6_EARH (DCRN_PLB4PLB6_BASE + 3)
+#define DCRN_PLB4PLB6_EARL (DCRN_PLB4PLB6_BASE + 4)
+
+/* PLB6-to-PLB4 Bridge */
+#define DCRN_PLB6PLB4_BASE 0x11111350
+#define DCRN_PLB6PLB4_ESR (DCRN_PLB6PLB4_BASE + 1)
+#define DCRN_PLB6PLB4_EARH (DCRN_PLB6PLB4_BASE + 3)
+#define DCRN_PLB6PLB4_EARL (DCRN_PLB6PLB4_BASE + 4)
+
+/* PLB6-to-MCIF Bridge */
+#define DCRN_PLB6MCIF_BASE 0x11111380
+#define DCRN_PLB6MCIF_BESR0 (DCRN_PLB6MCIF_BASE + 0)
+#define DCRN_PLB6MCIF_BESR1 (DCRN_PLB6MCIF_BASE + 1)
+#define DCRN_PLB6MCIF_BEARL (DCRN_PLB6MCIF_BASE + 2)
+#define DCRN_PLB6MCIF_BEARH (DCRN_PLB6MCIF_BASE + 3)
+
+/* Configuration Logic Registers */
+#define DCRN_CONF_BASE 0x11111400
+#define DCRN_CONF_FIR_RWC (DCRN_CONF_BASE + 0x3A)
+#define DCRN_CONF_EIR_RS (DCRN_CONF_BASE + 0x3E)
+#define DCRN_CONF_RPERR0 (DCRN_CONF_BASE + 0x4D)
+#define DCRN_CONF_RPERR1 (DCRN_CONF_BASE + 0x4E)
+
+#define DCRN_L2CDCRAI 0x11111100
+#define DCRN_L2CDCRDI 0x11111104
+/* L2 indirect addresses */
+#define L2MCK 0x120
+#define L2MCKEN 0x130
+#define L2INT 0x150
+#define L2INTEN 0x160
+#define L2LOG0 0x180
+#define L2LOG1 0x184
+#define L2LOG2 0x188
+#define L2LOG3 0x18C
+#define L2LOG4 0x190
+#define L2LOG5 0x194
+#define L2PLBSTAT0 0x300
+#define L2PLBSTAT1 0x304
+#define L2PLBMCKEN0 0x330
+#define L2PLBMCKEN1 0x334
+#define L2PLBINTEN0 0x360
+#define L2PLBINTEN1 0x364
+#define L2ARRSTAT0 0x500
+#define L2ARRSTAT1 0x504
+#define L2ARRSTAT2 0x508
+#define L2ARRMCKEN0 0x530
+#define L2ARRMCKEN1 0x534
+#define L2ARRMCKEN2 0x538
+#define L2ARRINTEN0 0x560
+#define L2ARRINTEN1 0x564
+#define L2ARRINTEN2 0x568
+#define L2CPUSTAT 0x700
+#define L2CPUMCKEN 0x730
+#define L2CPUINTEN 0x760
+#define L2RACSTAT0 0x900
+#define L2RACMCKEN0 0x930
+#define L2RACINTEN0 0x960
+#define L2WACSTAT0 0xD00
+#define L2WACSTAT1 0xD04
+#define L2WACSTAT2 0xD08
+#define L2WACMCKEN0 0xD30
+#define L2WACMCKEN1 0xD34
+#define L2WACMCKEN2 0xD38
+#define L2WACINTEN0 0xD60
+#define L2WACINTEN1 0xD64
+#define L2WACINTEN2 0xD68
+#define L2WDFSTAT 0xF00
+#define L2WDFMCKEN 0xF30
+#define L2WDFINTEN 0xF60
+
+/* DDR3/4 Memory Controller */
+#define DCRN_DDR34_BASE 0x11120000
+#define DCRN_DDR34_MCSTAT 0x10
+#define DCRN_DDR34_MCOPT1 0x20
+#define DCRN_DDR34_MCOPT2 0x21
+#define DCRN_DDR34_PHYSTAT 0x32
+#define DCRN_DDR34_CFGR0 0x40
+#define DCRN_DDR34_CFGR1 0x41
+#define DCRN_DDR34_CFGR2 0x42
+#define DCRN_DDR34_CFGR3 0x43
+#define DCRN_DDR34_SCRUB_CNTL 0xAA
+#define DCRN_DDR34_SCRUB_INT 0xAB
+#define DCRN_DDR34_SCRUB_START_ADDR 0xB0
+#define DCRN_DDR34_SCRUB_END_ADDR 0xD0
+#define DCRN_DDR34_ECCERR_ADDR_PORT0 0xE0
+#define DCRN_DDR34_ECCERR_ADDR_PORT1 0xE1
+#define DCRN_DDR34_ECCERR_ADDR_PORT2 0xE2
+#define DCRN_DDR34_ECCERR_ADDR_PORT3 0xE3
+#define DCRN_DDR34_ECCERR_COUNT_PORT0 0xE4
+#define DCRN_DDR34_ECCERR_COUNT_PORT1 0xE5
+#define DCRN_DDR34_ECCERR_COUNT_PORT2 0xE6
+#define DCRN_DDR34_ECCERR_COUNT_PORT3 0xE7
+#define DCRN_DDR34_ECCERR_PORT0 0xF0
+#define DCRN_DDR34_ECCERR_PORT1 0xF2
+#define DCRN_DDR34_ECCERR_PORT2 0xF4
+#define DCRN_DDR34_ECCERR_PORT3 0xF6
+#define DCRN_DDR34_ECC_CHECK_PORT0 0xF8
+#define DCRN_DDR34_ECC_CHECK_PORT1 0xF9
+#define DCRN_DDR34_ECC_CHECK_PORT2 0xF9
+#define DCRN_DDR34_ECC_CHECK_PORT3 0xFB
+
+#define DDR34_SCRUB_CNTL_STOP 0x00000000
+#define DDR34_SCRUB_CNTL_SCRUB 0x80000000
+#define DDR34_SCRUB_CNTL_UE_STOP 0x20000000
+#define DDR34_SCRUB_CNTL_CE_STOP 0x10000000
+#define DDR34_SCRUB_CNTL_RANK_EN 0x00008000
+
+/* PLB-Attached DDR3/4 Core Wrapper */
+#define DCRN_CW_BASE 0x11111800
+#define DCRN_CW_MCER0 0x00
+#define DCRN_CW_MCER1 0x01
+#define DCRN_CW_MCER_AND0 0x02
+#define DCRN_CW_MCER_AND1 0x03
+#define DCRN_CW_MCER_OR0 0x04
+#define DCRN_CW_MCER_OR1 0x05
+#define DCRN_CW_MCER_MASK0 0x06
+#define DCRN_CW_MCER_MASK1 0x07
+#define DCRN_CW_MCER_MASK_AND0 0x08
+#define DCRN_CW_MCER_MASK_AND1 0x09
+#define DCRN_CW_MCER_MASK_OR0 0x0A
+#define DCRN_CW_MCER_MASK_OR1 0x0B
+#define DCRN_CW_MCER_ACTION0 0x0C
+#define DCRN_CW_MCER_ACTION1 0x0D
+#define DCRN_CW_MCER_WOF0 0x0E
+#define DCRN_CW_MCER_WOF1 0x0F
+#define DCRN_CW_LFIR 0x10
+#define DCRN_CW_LFIR_AND 0x11
+#define DCRN_CW_LFIR_OR 0x12
+#define DCRN_CW_LFIR_MASK 0x13
+#define DCRN_CW_LFIR_MASK_AND 0x14
+#define DCRN_CW_LFIR_MASK_OR 0x15
+
+#define CW_MCER0_MEM_CE 0x00020000
+/* CMU addresses */
+#define CMUN_CRCS 0x00 /* Chip Reset Control/Status */
+#define CMUN_CONFFIR0 0x20 /* Config Reg Parity FIR 0 */
+#define CMUN_CONFFIR1 0x21 /* Config Reg Parity FIR 1 */
+#define CMUN_CONFFIR2 0x22 /* Config Reg Parity FIR 2 */
+#define CMUN_CONFFIR3 0x23 /* Config Reg Parity FIR 3 */
+#define CMUN_URCR3_RS 0x24 /* Unit Reset Control Reg 3 Set */
+#define CMUN_URCR3_C 0x25 /* Unit Reset Control Reg 3 Clear */
+#define CMUN_URCR3_P 0x26 /* Unit Reset Control Reg 3 Pulse */
+#define CMUN_PW0 0x2C /* Pulse Width Register */
+#define CMUN_URCR0_P 0x2D /* Unit Reset Control Reg 0 Pulse */
+#define CMUN_URCR1_P 0x2E /* Unit Reset Control Reg 1 Pulse */
+#define CMUN_URCR2_P 0x2F /* Unit Reset Control Reg 2 Pulse */
+#define CMUN_CLS_RW 0x30 /* Code Load Status (Read/Write) */
+#define CMUN_CLS_S 0x31 /* Code Load Status (Set) */
+#define CMUN_CLS_C 0x32 /* Code Load Status (Clear */
+#define CMUN_URCR2_RS 0x33 /* Unit Reset Control Reg 2 Set */
+#define CMUN_URCR2_C 0x34 /* Unit Reset Control Reg 2 Clear */
+#define CMUN_CLKEN0 0x35 /* Clock Enable 0 */
+#define CMUN_CLKEN1 0x36 /* Clock Enable 1 */
+#define CMUN_PCD0 0x37 /* PSI clock divider 0 */
+#define CMUN_PCD1 0x38 /* PSI clock divider 1 */
+#define CMUN_TMR0 0x39 /* Reset Timer */
+#define CMUN_TVS0 0x3A /* TV Sense Reg 0 */
+#define CMUN_TVS1 0x3B /* TV Sense Reg 1 */
+#define CMUN_MCCR 0x3C /* DRAM Configuration Reg */
+#define CMUN_FIR0 0x3D /* Fault Isolation Reg 0 */
+#define CMUN_FMR0 0x3E /* FIR Mask Reg 0 */
+#define CMUN_ETDRB 0x3F /* ETDR Backdoor */
+
+/* CRCS bit fields */
+#define CRCS_STAT_MASK 0xF0000000
+#define CRCS_STAT_POR 0x10000000
+#define CRCS_STAT_PHR 0x20000000
+#define CRCS_STAT_PCIE 0x30000000
+#define CRCS_STAT_CRCS_SYS 0x40000000
+#define CRCS_STAT_DBCR_SYS 0x50000000
+#define CRCS_STAT_HOST_SYS 0x60000000
+#define CRCS_STAT_CHIP_RST_B 0x70000000
+#define CRCS_STAT_CRCS_CHIP 0x80000000
+#define CRCS_STAT_DBCR_CHIP 0x90000000
+#define CRCS_STAT_HOST_CHIP 0xA0000000
+#define CRCS_STAT_PSI_CHIP 0xB0000000
+#define CRCS_STAT_CRCS_CORE 0xC0000000
+#define CRCS_STAT_DBCR_CORE 0xD0000000
+#define CRCS_STAT_HOST_CORE 0xE0000000
+#define CRCS_STAT_PCIE_HOT 0xF0000000
+#define CRCS_STAT_SELF_CORE 0x40000000
+#define CRCS_STAT_SELF_CHIP 0x50000000
+#define CRCS_WATCHE 0x08000000
+#define CRCS_CORE 0x04000000 /* Reset PPC440 core */
+#define CRCS_CHIP 0x02000000 /* Chip Reset */
+#define CRCS_SYS 0x01000000 /* System Reset */
+#define CRCS_WRCR 0x00800000 /* Watchdog reset on core reset */
+#define CRCS_EXTCR 0x00080000 /* CHIP_RST_B triggers chip reset */
+#define CRCS_PLOCK 0x00000002 /* PLL Locked */
+
+#define mtcmu(reg, data) \
+do { \
+ mtdcr(DCRN_CMU_ADDR, reg); \
+ mtdcr(DCRN_CMU_DATA, data); \
+} while (0)
+
+#define mfcmu(reg)\
+ ({u32 data; \
+ mtdcr(DCRN_CMU_ADDR, reg); \
+ data = mfdcr(DCRN_CMU_DATA); \
+ data; })
+
+#define mtl2(reg, data) \
+do { \
+ mtdcr(DCRN_L2CDCRAI, reg); \
+ mtdcr(DCRN_L2CDCRDI, data); \
+} while (0)
+
+#define mfl2(reg) \
+ ({u32 data; \
+ mtdcr(DCRN_L2CDCRAI, reg); \
+ data = mfdcr(DCRN_L2CDCRDI); \
+ data; })
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_FSP2_DCR_H_ */
diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c
index f99e79ee060e..48abb4cb304c 100644
--- a/arch/powerpc/platforms/512x/mpc512x_shared.c
+++ b/arch/powerpc/platforms/512x/mpc512x_shared.c
@@ -387,8 +387,8 @@ static unsigned int __init get_fifo_size(struct device_node *np,
if (fp)
return *fp;
- pr_warning("no %s property in %pOF node, defaulting to %d\n",
- prop_name, np, DEFAULT_FIFO_SIZE);
+ pr_warn("no %s property in %pOF node, defaulting to %d\n",
+ prop_name, np, DEFAULT_FIFO_SIZE);
return DEFAULT_FIFO_SIZE;
}
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
index 9e974b1e1697..17cf249b18ee 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -90,7 +90,7 @@ struct mpc52xx_gpt_priv {
struct list_head list; /* List of all GPT devices */
struct device *dev;
struct mpc52xx_gpt __iomem *regs;
- spinlock_t lock;
+ raw_spinlock_t lock;
struct irq_domain *irqhost;
u32 ipb_freq;
u8 wdt_mode;
@@ -141,9 +141,9 @@ static void mpc52xx_gpt_irq_unmask(struct irq_data *d)
struct mpc52xx_gpt_priv *gpt = irq_data_get_irq_chip_data(d);
unsigned long flags;
- spin_lock_irqsave(&gpt->lock, flags);
+ raw_spin_lock_irqsave(&gpt->lock, flags);
setbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_IRQ_EN);
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
}
static void mpc52xx_gpt_irq_mask(struct irq_data *d)
@@ -151,9 +151,9 @@ static void mpc52xx_gpt_irq_mask(struct irq_data *d)
struct mpc52xx_gpt_priv *gpt = irq_data_get_irq_chip_data(d);
unsigned long flags;
- spin_lock_irqsave(&gpt->lock, flags);
+ raw_spin_lock_irqsave(&gpt->lock, flags);
clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_IRQ_EN);
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
}
static void mpc52xx_gpt_irq_ack(struct irq_data *d)
@@ -171,14 +171,14 @@ static int mpc52xx_gpt_irq_set_type(struct irq_data *d, unsigned int flow_type)
dev_dbg(gpt->dev, "%s: virq=%i type=%x\n", __func__, d->irq, flow_type);
- spin_lock_irqsave(&gpt->lock, flags);
+ raw_spin_lock_irqsave(&gpt->lock, flags);
reg = in_be32(&gpt->regs->mode) & ~MPC52xx_GPT_MODE_ICT_MASK;
if (flow_type & IRQF_TRIGGER_RISING)
reg |= MPC52xx_GPT_MODE_ICT_RISING;
if (flow_type & IRQF_TRIGGER_FALLING)
reg |= MPC52xx_GPT_MODE_ICT_FALLING;
out_be32(&gpt->regs->mode, reg);
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
return 0;
}
@@ -264,11 +264,11 @@ mpc52xx_gpt_irq_setup(struct mpc52xx_gpt_priv *gpt, struct device_node *node)
/* If the GPT is currently disabled, then change it to be in Input
* Capture mode. If the mode is non-zero, then the pin could be
* already in use for something. */
- spin_lock_irqsave(&gpt->lock, flags);
+ raw_spin_lock_irqsave(&gpt->lock, flags);
mode = in_be32(&gpt->regs->mode);
if ((mode & MPC52xx_GPT_MODE_MS_MASK) == 0)
out_be32(&gpt->regs->mode, mode | MPC52xx_GPT_MODE_MS_IC);
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
dev_dbg(gpt->dev, "%s() complete. virq=%i\n", __func__, cascade_virq);
}
@@ -295,9 +295,9 @@ mpc52xx_gpt_gpio_set(struct gpio_chip *gc, unsigned int gpio, int v)
dev_dbg(gpt->dev, "%s: gpio:%d v:%d\n", __func__, gpio, v);
r = v ? MPC52xx_GPT_MODE_GPIO_OUT_HIGH : MPC52xx_GPT_MODE_GPIO_OUT_LOW;
- spin_lock_irqsave(&gpt->lock, flags);
+ raw_spin_lock_irqsave(&gpt->lock, flags);
clrsetbits_be32(&gpt->regs->mode, MPC52xx_GPT_MODE_GPIO_MASK, r);
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
}
static int mpc52xx_gpt_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
@@ -307,9 +307,9 @@ static int mpc52xx_gpt_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
dev_dbg(gpt->dev, "%s: gpio:%d\n", __func__, gpio);
- spin_lock_irqsave(&gpt->lock, flags);
+ raw_spin_lock_irqsave(&gpt->lock, flags);
clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_GPIO_MASK);
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
return 0;
}
@@ -436,16 +436,16 @@ static int mpc52xx_gpt_do_start(struct mpc52xx_gpt_priv *gpt, u64 period,
}
/* Set and enable the timer, reject an attempt to use a wdt as gpt */
- spin_lock_irqsave(&gpt->lock, flags);
+ raw_spin_lock_irqsave(&gpt->lock, flags);
if (as_wdt)
gpt->wdt_mode |= MPC52xx_GPT_IS_WDT;
else if ((gpt->wdt_mode & MPC52xx_GPT_IS_WDT) != 0) {
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
return -EBUSY;
}
out_be32(&gpt->regs->count, prescale << 16 | clocks);
clrsetbits_be32(&gpt->regs->mode, clear, set);
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
return 0;
}
@@ -476,14 +476,14 @@ int mpc52xx_gpt_stop_timer(struct mpc52xx_gpt_priv *gpt)
unsigned long flags;
/* reject the operation if the timer is used as watchdog (gpt 0 only) */
- spin_lock_irqsave(&gpt->lock, flags);
+ raw_spin_lock_irqsave(&gpt->lock, flags);
if ((gpt->wdt_mode & MPC52xx_GPT_IS_WDT) != 0) {
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
return -EBUSY;
}
clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_COUNTER_ENABLE);
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
return 0;
}
EXPORT_SYMBOL(mpc52xx_gpt_stop_timer);
@@ -500,9 +500,9 @@ u64 mpc52xx_gpt_timer_period(struct mpc52xx_gpt_priv *gpt)
u64 prescale;
unsigned long flags;
- spin_lock_irqsave(&gpt->lock, flags);
+ raw_spin_lock_irqsave(&gpt->lock, flags);
period = in_be32(&gpt->regs->count);
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
prescale = period >> 16;
period &= 0xffff;
@@ -532,9 +532,9 @@ static inline void mpc52xx_gpt_wdt_ping(struct mpc52xx_gpt_priv *gpt_wdt)
{
unsigned long flags;
- spin_lock_irqsave(&gpt_wdt->lock, flags);
+ raw_spin_lock_irqsave(&gpt_wdt->lock, flags);
out_8((u8 *) &gpt_wdt->regs->mode, MPC52xx_GPT_MODE_WDT_PING);
- spin_unlock_irqrestore(&gpt_wdt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt_wdt->lock, flags);
}
/* wdt misc device api */
@@ -638,11 +638,11 @@ static int mpc52xx_wdt_release(struct inode *inode, struct file *file)
struct mpc52xx_gpt_priv *gpt_wdt = file->private_data;
unsigned long flags;
- spin_lock_irqsave(&gpt_wdt->lock, flags);
+ raw_spin_lock_irqsave(&gpt_wdt->lock, flags);
clrbits32(&gpt_wdt->regs->mode,
MPC52xx_GPT_MODE_COUNTER_ENABLE | MPC52xx_GPT_MODE_WDT_EN);
gpt_wdt->wdt_mode &= ~MPC52xx_GPT_IS_WDT;
- spin_unlock_irqrestore(&gpt_wdt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt_wdt->lock, flags);
#endif
clear_bit(0, &wdt_is_active);
return 0;
@@ -723,7 +723,7 @@ static int mpc52xx_gpt_probe(struct platform_device *ofdev)
if (!gpt)
return -ENOMEM;
- spin_lock_init(&gpt->lock);
+ raw_spin_lock_init(&gpt->lock);
gpt->dev = &ofdev->dev;
gpt->ipb_freq = mpc5xxx_get_bus_frequency(ofdev->dev.of_node);
gpt->regs = of_iomap(ofdev->dev.of_node, 0);
diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
index 96bb55ca61d3..d2ef39f0edc8 100644
--- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
+++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
@@ -84,7 +84,7 @@ static ssize_t show_status(struct device *d,
return sprintf(buf, "%02x\n", ret);
}
-static DEVICE_ATTR(status, S_IRUGO, show_status, NULL);
+static DEVICE_ATTR(status, 0444, show_status, NULL);
static void mcu_power_off(void)
{
diff --git a/arch/powerpc/platforms/83xx/mpc832x_mds.c b/arch/powerpc/platforms/83xx/mpc832x_mds.c
index bb7b25acf26f..74c154e67c8b 100644
--- a/arch/powerpc/platforms/83xx/mpc832x_mds.c
+++ b/arch/powerpc/platforms/83xx/mpc832x_mds.c
@@ -75,7 +75,7 @@ static void __init mpc832x_sys_setup_arch(void)
par_io_init(np);
of_node_put(np);
- for (np = NULL; (np = of_find_node_by_name(np, "ucc")) != NULL;)
+ for_each_node_by_name(np, "ucc")
par_io_of_config(np);
}
diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
index a4539c5accb0..438986593873 100644
--- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
@@ -204,7 +204,7 @@ static void __init mpc832x_rdb_setup_arch(void)
par_io_init(np);
of_node_put(np);
- for (np = NULL; (np = of_find_node_by_name(np, "ucc")) != NULL;)
+ for_each_node_by_name(np, "ucc")
par_io_of_config(np);
}
#endif /* CONFIG_QUICC_ENGINE */
diff --git a/arch/powerpc/platforms/83xx/mpc836x_mds.c b/arch/powerpc/platforms/83xx/mpc836x_mds.c
index 4fc3051c2b2e..fd44dd03e1f3 100644
--- a/arch/powerpc/platforms/83xx/mpc836x_mds.c
+++ b/arch/powerpc/platforms/83xx/mpc836x_mds.c
@@ -83,7 +83,7 @@ static void __init mpc836x_mds_setup_arch(void)
par_io_init(np);
of_node_put(np);
- for (np = NULL; (np = of_find_node_by_name(np, "ucc")) != NULL;)
+ for_each_node_by_name(np, "ucc")
par_io_of_config(np);
#ifdef CONFIG_QE_USB
/* Must fixup Par IO before QE GPIO chips are registered. */
diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
index 82f8490b5aa7..38d4ba9f37b5 100644
--- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
+++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
@@ -252,8 +252,7 @@ static int socrates_fpga_pic_host_xlate(struct irq_domain *h,
/* type is configurable */
if (intspec[1] != IRQ_TYPE_LEVEL_LOW &&
intspec[1] != IRQ_TYPE_LEVEL_HIGH) {
- pr_warning("FPGA PIC: invalid irq type, "
- "setting default active low\n");
+ pr_warn("FPGA PIC: invalid irq type, setting default active low\n");
*out_flags = IRQ_TYPE_LEVEL_LOW;
} else {
*out_flags = intspec[1];
@@ -267,7 +266,7 @@ static int socrates_fpga_pic_host_xlate(struct irq_domain *h,
if (intspec[2] <= 2)
fpga_irq->irq_line = intspec[2];
else
- pr_warning("FPGA PIC: invalid irq routing\n");
+ pr_warn("FPGA PIC: invalid irq routing\n");
return 0;
}
@@ -293,7 +292,7 @@ void socrates_fpga_pic_init(struct device_node *pic)
for (i = 0; i < 3; i++) {
socrates_fpga_irqs[i] = irq_of_parse_and_map(pic, i);
if (!socrates_fpga_irqs[i]) {
- pr_warning("FPGA PIC: can't get irq%d.\n", i);
+ pr_warn("FPGA PIC: can't get irq%d\n", i);
continue;
}
irq_set_chained_handler(socrates_fpga_irqs[i],
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
index a0e989ed4b6f..17c6cd3d02e6 100644
--- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
+++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
@@ -101,7 +101,7 @@ static int __init mpc86xx_hpcn_probe(void)
/* Be nice and don't give silent boot death. Delete this in 2.6.27 */
if (of_machine_is_compatible("mpc86xx")) {
- pr_warning("WARNING: your dts/dtb is old. You must update before the next kernel release\n");
+ pr_warn("WARNING: your dts/dtb is old. You must update before the next kernel release.\n");
return 1;
}
diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig
index e2089d3de00c..d408162d5af4 100644
--- a/arch/powerpc/platforms/8xx/Kconfig
+++ b/arch/powerpc/platforms/8xx/Kconfig
@@ -116,18 +116,6 @@ config 8xx_GPIO
If in doubt, say Y here.
-config 8xx_CPU6
- bool "CPU6 Silicon Errata (860 Pre Rev. C)"
- help
- MPC860 CPUs, prior to Rev C have some bugs in the silicon, which
- require workarounds for Linux (and most other OSes to work). If you
- get a BUG() very early in boot, this might fix the problem. For
- more details read the document entitled "MPC860 Family Device Errata
- Reference" on Freescale's website. This option also incurs a
- performance hit.
-
- If in doubt, say N here.
-
config 8xx_CPU15
bool "CPU15 Silicon Errata"
depends on !HUGETLB_PAGE
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 5a96a2763e4a..14ef17e10ec9 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -293,17 +293,6 @@ config CPM2
you wish to build a kernel for a machine with a CPM2 coprocessor
on it (826x, 827x, 8560).
-config AXON_RAM
- tristate "Axon DDR2 memory device driver"
- depends on PPC_IBM_CELL_BLADE && BLOCK
- select DAX
- default m
- help
- It registers one block device per Axon's DDR2 memory bank found
- on a system. Block devices are called axonram?, their major and
- minor numbers are available in /proc/devices, /proc/partitions or
- in /sys/block/axonram?/dev.
-
config FSL_ULI1575
bool
default n
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index ae07470fde3c..a429d859f15d 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -33,7 +33,6 @@ config PPC_85xx
config PPC_8xx
bool "Freescale 8xx"
select FSL_SOC
- select PPC_LIB_RHEAP
select SYS_SUPPORTS_HUGETLBFS
config 40x
@@ -168,13 +167,6 @@ config PPC_FPU
bool
default y if PPC64
-config PPC_8xx_PERF_EVENT
- bool "PPC 8xx perf events"
- depends on PPC_8xx && PERF_EVENTS
- help
- This is Performance Events support for PPC 8xx. The 8xx doesn't
- have a PMU but some events are emulated using 8xx features.
-
config FSL_EMB_PERFMON
bool "Freescale Embedded Perfmon"
depends on E500 || PPC_83xx
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 6fc85e29dc08..5d4bf9aed51a 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -315,8 +315,7 @@ static int __init setup_iic(void)
struct cbe_iic_regs __iomem *node_iic;
const u32 *np;
- for (dn = NULL;
- (dn = of_find_node_by_name(dn,"interrupt-controller")) != NULL;) {
+ for_each_node_by_name(dn, "interrupt-controller") {
if (!of_device_is_compatible(dn,
"IBM,CBEA-Internal-Interrupt-Controller"))
continue;
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index d3543e68efe8..7d31b8d14661 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -192,8 +192,7 @@ static void __init mpic_init_IRQ(void)
struct device_node *dn;
struct mpic *mpic;
- for (dn = NULL;
- (dn = of_find_node_by_name(dn, "interrupt-controller"));) {
+ for_each_node_by_name(dn, "interrupt-controller") {
if (!of_device_is_compatible(dn, "CBEA,platform-open-pic"))
continue;
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
index aa44bfc46467..c137f0cb4151 100644
--- a/arch/powerpc/platforms/cell/spider-pic.c
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -343,8 +343,7 @@ void __init spider_init_IRQ(void)
* device-tree is bogus anyway) so all we can do is pray or maybe test
* the address and deduce the node-id
*/
- for (dn = NULL;
- (dn = of_find_node_by_name(dn, "interrupt-controller"));) {
+ for_each_node_by_name(dn, "interrupt-controller") {
if (of_device_is_compatible(dn, "CBEA,platform-spider-pic")) {
if (of_address_to_resource(dn, 0, &r)) {
printk(KERN_WARNING "spider-pic: Failed\n");
diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c
index f636ee22b203..5c409c98cca8 100644
--- a/arch/powerpc/platforms/cell/spu_manage.c
+++ b/arch/powerpc/platforms/cell/spu_manage.c
@@ -292,12 +292,12 @@ static int __init of_enumerate_spus(int (*fn)(void *data))
unsigned int n = 0;
ret = -ENODEV;
- for (node = of_find_node_by_type(NULL, "spe");
- node; node = of_find_node_by_type(node, "spe")) {
+ for_each_node_by_type(node, "spe") {
ret = fn(node);
if (ret) {
printk(KERN_WARNING "%s: Error initializing %s\n",
__func__, node->name);
+ of_node_put(node);
break;
}
n++;
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index fc7772c3d068..c1be486da899 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -2375,8 +2375,8 @@ static int switch_log_sprint(struct spu_context *ctx, char *tbuf, int n)
p = ctx->switch_log->log + ctx->switch_log->tail % SWITCH_LOG_BUFSIZE;
- return snprintf(tbuf, n, "%u.%09u %d %u %u %llu\n",
- (unsigned int) p->tstamp.tv_sec,
+ return snprintf(tbuf, n, "%llu.%09u %d %u %u %llu\n",
+ (unsigned long long) p->tstamp.tv_sec,
(unsigned int) p->tstamp.tv_nsec,
p->spu_id,
(unsigned int) p->type,
@@ -2499,7 +2499,7 @@ void spu_switch_log_notify(struct spu *spu, struct spu_context *ctx,
struct switch_log_entry *p;
p = ctx->switch_log->log + ctx->switch_log->head;
- ktime_get_ts(&p->tstamp);
+ ktime_get_ts64(&p->tstamp);
p->timebase = get_tb();
p->spu_id = spu ? spu->number : -1;
p->type = type;
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 9558d725a99b..db329d4bf1c3 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -455,7 +455,7 @@ spufs_create_context(struct inode *inode, struct dentry *dentry,
}
}
- ret = spufs_mkdir(inode, dentry, flags, mode & S_IRWXUGO);
+ ret = spufs_mkdir(inode, dentry, flags, mode & 0777);
if (ret)
goto out_aff_unlock;
@@ -546,7 +546,7 @@ static int spufs_create_gang(struct inode *inode,
struct path path = {.mnt = mnt, .dentry = dentry};
int ret;
- ret = spufs_mkgang(inode, dentry, mode & S_IRWXUGO);
+ ret = spufs_mkgang(inode, dentry, mode & 0777);
if (!ret) {
ret = spufs_gang_open(&path);
if (ret < 0) {
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index 2d0479ad3af4..b5fc1b3fe538 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -69,7 +69,7 @@ struct switch_log {
unsigned long head;
unsigned long tail;
struct switch_log_entry {
- struct timespec tstamp;
+ struct timespec64 tstamp;
s32 spu_id;
u32 type;
u32 val;
diff --git a/arch/powerpc/platforms/pasemi/dma_lib.c b/arch/powerpc/platforms/pasemi/dma_lib.c
index aafa01ba062f..2c72263ad6ab 100644
--- a/arch/powerpc/platforms/pasemi/dma_lib.c
+++ b/arch/powerpc/platforms/pasemi/dma_lib.c
@@ -589,7 +589,7 @@ int pasemi_dma_init(void)
pasemi_write_dma_reg(PAS_DMA_COM_RXCMD, 0);
while (pasemi_read_dma_reg(PAS_DMA_COM_RXSTA) & 1) {
if (time_after(jiffies, timeout)) {
- pr_warning("Warning: Could not disable RX section\n");
+ pr_warn("Warning: Could not disable RX section\n");
break;
}
}
@@ -598,7 +598,7 @@ int pasemi_dma_init(void)
pasemi_write_dma_reg(PAS_DMA_COM_TXCMD, 0);
while (pasemi_read_dma_reg(PAS_DMA_COM_TXSTA) & 1) {
if (time_after(jiffies, timeout)) {
- pr_warning("Warning: Could not disable TX section\n");
+ pr_warn("Warning: Could not disable TX section\n");
break;
}
}
diff --git a/arch/powerpc/platforms/powermac/backlight.c b/arch/powerpc/platforms/powermac/backlight.c
index a00096b1c713..6b5dcccae1d3 100644
--- a/arch/powerpc/platforms/powermac/backlight.c
+++ b/arch/powerpc/platforms/powermac/backlight.c
@@ -186,7 +186,7 @@ int pmac_backlight_set_legacy_brightness(int brightness)
return __pmac_backlight_set_legacy_brightness(brightness);
}
-int pmac_backlight_get_legacy_brightness()
+int pmac_backlight_get_legacy_brightness(void)
{
int result = -ENXIO;
@@ -205,12 +205,12 @@ int pmac_backlight_get_legacy_brightness()
return result;
}
-void pmac_backlight_disable()
+void pmac_backlight_disable(void)
{
atomic_inc(&kernel_backlight_disabled);
}
-void pmac_backlight_enable()
+void pmac_backlight_enable(void)
{
atomic_dec(&kernel_backlight_disabled);
}
diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
index 9e3f39d36e88..466b84234683 100644
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -2641,7 +2641,7 @@ static void __init probe_one_macio(const char *name, const char *compat, int typ
phys_addr_t addr;
u64 size;
- for (node = NULL; (node = of_find_node_by_name(node, name)) != NULL;) {
+ for_each_node_by_name(node, name) {
if (!compat)
break;
if (of_device_is_compatible(node, compat))
@@ -2853,7 +2853,6 @@ set_initial_features(void)
}
/* Enable ATA-100 before PCI probe. */
- np = of_find_node_by_name(NULL, "ata-6");
for_each_node_by_name(np, "ata-6") {
if (np->parent
&& of_device_is_compatible(np->parent, "uni-north")
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index 5e0719b27294..57bbff465964 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -486,15 +486,16 @@ static int __init pmac_pic_probe_mpic(void)
struct device_node *np, *master = NULL, *slave = NULL;
/* We can have up to 2 MPICs cascaded */
- for (np = NULL; (np = of_find_node_by_type(np, "open-pic"))
- != NULL;) {
+ for_each_node_by_type(np, "open-pic") {
if (master == NULL &&
of_get_property(np, "interrupts", NULL) == NULL)
master = of_node_get(np);
else if (slave == NULL)
slave = of_node_get(np);
- if (master && slave)
+ if (master && slave) {
+ of_node_put(np);
break;
+ }
}
/* Check for bogus setups */
@@ -604,6 +605,7 @@ static int pmacpic_find_viaint(void)
if (np == NULL)
goto not_found;
viaint = irq_of_parse_and_map(np, 0);
+ of_node_put(np);
not_found:
#endif /* CONFIG_ADB_PMU */
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index 2cd99eb30762..95275e0e2efa 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -774,8 +774,8 @@ static void __init smp_core99_probe(void)
if (ppc_md.progress) ppc_md.progress("smp_core99_probe", 0x345);
/* Count CPUs in the device-tree */
- for (cpus = NULL; (cpus = of_find_node_by_type(cpus, "cpu")) != NULL;)
- ++ncpus;
+ for_each_node_by_type(cpus, "cpu")
+ ++ncpus;
printk(KERN_INFO "PowerMac SMP probe found %d cpus\n", ncpus);
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 3732118a0482..6c9d5199a7e2 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -17,3 +17,4 @@ obj-$(CONFIG_PERF_EVENTS) += opal-imc.o
obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o
obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o
obj-$(CONFIG_PPC_FTW) += nx-ftw.o
+obj-$(CONFIG_OCXL_BASE) += ocxl.o
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 4650fb294e7a..33c86c1a1720 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -43,6 +43,22 @@
static int eeh_event_irq = -EINVAL;
+void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
+{
+ struct pci_dn *pdn = pci_get_pdn(pdev);
+
+ if (!pdev->is_virtfn)
+ return;
+
+ /*
+ * The following operations will fail if VF's sysfs files
+ * aren't created or its resources aren't finalized.
+ */
+ eeh_add_device_early(pdn);
+ eeh_add_device_late(pdev);
+ eeh_sysfs_add_device(pdev);
+}
+
static int pnv_eeh_init(void)
{
struct pci_controller *hose;
@@ -86,6 +102,7 @@ static int pnv_eeh_init(void)
}
eeh_set_pe_aux_size(max_diag_size);
+ ppc_md.pcibios_bus_add_device = pnv_pcibios_bus_add_device;
return 0;
}
@@ -1638,70 +1655,11 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
return ret;
}
-static int pnv_eeh_restore_vf_config(struct pci_dn *pdn)
-{
- struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
- u32 devctl, cmd, cap2, aer_capctl;
- int old_mps;
-
- if (edev->pcie_cap) {
- /* Restore MPS */
- old_mps = (ffs(pdn->mps) - 8) << 5;
- eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
- 2, &devctl);
- devctl &= ~PCI_EXP_DEVCTL_PAYLOAD;
- devctl |= old_mps;
- eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
- 2, devctl);
-
- /* Disable Completion Timeout */
- eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2,
- 4, &cap2);
- if (cap2 & 0x10) {
- eeh_ops->read_config(pdn,
- edev->pcie_cap + PCI_EXP_DEVCTL2,
- 4, &cap2);
- cap2 |= 0x10;
- eeh_ops->write_config(pdn,
- edev->pcie_cap + PCI_EXP_DEVCTL2,
- 4, cap2);
- }
- }
-
- /* Enable SERR and parity checking */
- eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd);
- cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR);
- eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd);
-
- /* Enable report various errors */
- if (edev->pcie_cap) {
- eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
- 2, &devctl);
- devctl &= ~PCI_EXP_DEVCTL_CERE;
- devctl |= (PCI_EXP_DEVCTL_NFERE |
- PCI_EXP_DEVCTL_FERE |
- PCI_EXP_DEVCTL_URRE);
- eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
- 2, devctl);
- }
-
- /* Enable ECRC generation and check */
- if (edev->pcie_cap && edev->aer_cap) {
- eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP,
- 4, &aer_capctl);
- aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
- eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP,
- 4, aer_capctl);
- }
-
- return 0;
-}
-
static int pnv_eeh_restore_config(struct pci_dn *pdn)
{
struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
struct pnv_phb *phb;
- s64 ret;
+ s64 ret = 0;
int config_addr = (pdn->busno << 8) | (pdn->devfn);
if (!edev)
@@ -1715,7 +1673,7 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn)
* to be exported by firmware in extendible way.
*/
if (edev->physfn) {
- ret = pnv_eeh_restore_vf_config(pdn);
+ ret = eeh_restore_vf_config(pdn);
} else {
phb = pdn->phb->private_data;
ret = opal_pci_reinit(phb->opal_id,
@@ -1728,7 +1686,7 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn)
return -EIO;
}
- return 0;
+ return ret;
}
static struct eeh_ops pnv_eeh_ops = {
@@ -1746,25 +1704,10 @@ static struct eeh_ops pnv_eeh_ops = {
.read_config = pnv_eeh_read_config,
.write_config = pnv_eeh_write_config,
.next_error = pnv_eeh_next_error,
- .restore_config = pnv_eeh_restore_config
+ .restore_config = pnv_eeh_restore_config,
+ .notify_resume = NULL
};
-void pcibios_bus_add_device(struct pci_dev *pdev)
-{
- struct pci_dn *pdn = pci_get_pdn(pdev);
-
- if (!pdev->is_virtfn)
- return;
-
- /*
- * The following operations will fail if VF's sysfs files
- * aren't created or its resources aren't finalized.
- */
- eeh_add_device_early(pdn);
- eeh_add_device_late(pdev);
- eeh_sysfs_add_device(pdev);
-}
-
#ifdef CONFIG_PCI_IOV
static void pnv_pci_fixup_vf_mps(struct pci_dev *pdev)
{
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index f6cbc1a71472..0a253b64ac5f 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -39,7 +39,10 @@
*/
static struct pci_dev *get_pci_dev(struct device_node *dn)
{
- return PCI_DN(dn)->pcidev;
+ struct pci_dn *pdn = PCI_DN(dn);
+
+ return pci_get_domain_bus_and_slot(pci_domain_nr(pdn->phb->bus),
+ pdn->busno, pdn->devfn);
}
/* Given a NPU device get the associated PCI device. */
@@ -277,7 +280,7 @@ static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe)
int64_t rc = 0;
phys_addr_t top = memblock_end_of_DRAM();
- if (phb->type != PNV_PHB_NPU || !npe->pdev)
+ if (phb->type != PNV_PHB_NPU_NVLINK || !npe->pdev)
return -EINVAL;
rc = pnv_npu_unset_window(npe, 0);
diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c
new file mode 100644
index 000000000000..fa9b53af3c7b
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/ocxl.c
@@ -0,0 +1,515 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <asm/pnv-ocxl.h>
+#include <asm/opal.h>
+#include <asm/xive.h>
+#include <misc/ocxl-config.h>
+#include "pci.h"
+
+#define PNV_OCXL_TL_P9_RECV_CAP 0x000000000000000Full
+#define PNV_OCXL_ACTAG_MAX 64
+/* PASIDs are 20-bit, but on P9, NPU can only handle 15 bits */
+#define PNV_OCXL_PASID_BITS 15
+#define PNV_OCXL_PASID_MAX ((1 << PNV_OCXL_PASID_BITS) - 1)
+
+#define AFU_PRESENT (1 << 31)
+#define AFU_INDEX_MASK 0x3F000000
+#define AFU_INDEX_SHIFT 24
+#define ACTAG_MASK 0xFFF
+
+
+struct actag_range {
+ u16 start;
+ u16 count;
+};
+
+struct npu_link {
+ struct list_head list;
+ int domain;
+ int bus;
+ int dev;
+ u16 fn_desired_actags[8];
+ struct actag_range fn_actags[8];
+ bool assignment_done;
+};
+static struct list_head links_list = LIST_HEAD_INIT(links_list);
+static DEFINE_MUTEX(links_list_lock);
+
+
+/*
+ * opencapi actags handling:
+ *
+ * When sending commands, the opencapi device references the memory
+ * context it's targeting with an 'actag', which is really an alias
+ * for a (BDF, pasid) combination. When it receives a command, the NPU
+ * must do a lookup of the actag to identify the memory context. The
+ * hardware supports a finite number of actags per link (64 for
+ * POWER9).
+ *
+ * The device can carry multiple functions, and each function can have
+ * multiple AFUs. Each AFU advertises in its config space the number
+ * of desired actags. The host must configure in the config space of
+ * the AFU how many actags the AFU is really allowed to use (which can
+ * be less than what the AFU desires).
+ *
+ * When a PCI function is probed by the driver, it has no visibility
+ * about the other PCI functions and how many actags they'd like,
+ * which makes it impossible to distribute actags fairly among AFUs.
+ *
+ * Unfortunately, the only way to know how many actags a function
+ * desires is by looking at the data for each AFU in the config space
+ * and add them up. Similarly, the only way to know how many actags
+ * all the functions of the physical device desire is by adding the
+ * previously computed function counts. Then we can match that against
+ * what the hardware supports.
+ *
+ * To get a comprehensive view, we use a 'pci fixup': at the end of
+ * PCI enumeration, each function counts how many actags its AFUs
+ * desire and we save it in a 'npu_link' structure, shared between all
+ * the PCI functions of a same device. Therefore, when the first
+ * function is probed by the driver, we can get an idea of the total
+ * count of desired actags for the device, and assign the actags to
+ * the AFUs, by pro-rating if needed.
+ */
+
+static int find_dvsec_from_pos(struct pci_dev *dev, int dvsec_id, int pos)
+{
+ int vsec = pos;
+ u16 vendor, id;
+
+ while ((vsec = pci_find_next_ext_capability(dev, vsec,
+ OCXL_EXT_CAP_ID_DVSEC))) {
+ pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
+ &vendor);
+ pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
+ if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id)
+ return vsec;
+ }
+ return 0;
+}
+
+static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx)
+{
+ int vsec = 0;
+ u8 idx;
+
+ while ((vsec = find_dvsec_from_pos(dev, OCXL_DVSEC_AFU_CTRL_ID,
+ vsec))) {
+ pci_read_config_byte(dev, vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX,
+ &idx);
+ if (idx == afu_idx)
+ return vsec;
+ }
+ return 0;
+}
+
+static int get_max_afu_index(struct pci_dev *dev, int *afu_idx)
+{
+ int pos;
+ u32 val;
+
+ pos = find_dvsec_from_pos(dev, OCXL_DVSEC_FUNC_ID, 0);
+ if (!pos)
+ return -ESRCH;
+
+ pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val);
+ if (val & AFU_PRESENT)
+ *afu_idx = (val & AFU_INDEX_MASK) >> AFU_INDEX_SHIFT;
+ else
+ *afu_idx = -1;
+ return 0;
+}
+
+static int get_actag_count(struct pci_dev *dev, int afu_idx, int *actag)
+{
+ int pos;
+ u16 actag_sup;
+
+ pos = find_dvsec_afu_ctrl(dev, afu_idx);
+ if (!pos)
+ return -ESRCH;
+
+ pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP,
+ &actag_sup);
+ *actag = actag_sup & ACTAG_MASK;
+ return 0;
+}
+
+static struct npu_link *find_link(struct pci_dev *dev)
+{
+ struct npu_link *link;
+
+ list_for_each_entry(link, &links_list, list) {
+ /* The functions of a device all share the same link */
+ if (link->domain == pci_domain_nr(dev->bus) &&
+ link->bus == dev->bus->number &&
+ link->dev == PCI_SLOT(dev->devfn)) {
+ return link;
+ }
+ }
+
+ /* link doesn't exist yet. Allocate one */
+ link = kzalloc(sizeof(struct npu_link), GFP_KERNEL);
+ if (!link)
+ return NULL;
+ link->domain = pci_domain_nr(dev->bus);
+ link->bus = dev->bus->number;
+ link->dev = PCI_SLOT(dev->devfn);
+ list_add(&link->list, &links_list);
+ return link;
+}
+
+static void pnv_ocxl_fixup_actag(struct pci_dev *dev)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ struct npu_link *link;
+ int rc, afu_idx = -1, i, actag;
+
+ if (!machine_is(powernv))
+ return;
+
+ if (phb->type != PNV_PHB_NPU_OCAPI)
+ return;
+
+ mutex_lock(&links_list_lock);
+
+ link = find_link(dev);
+ if (!link) {
+ dev_warn(&dev->dev, "couldn't update actag information\n");
+ mutex_unlock(&links_list_lock);
+ return;
+ }
+
+ /*
+ * Check how many actags are desired for the AFUs under that
+ * function and add it to the count for the link
+ */
+ rc = get_max_afu_index(dev, &afu_idx);
+ if (rc) {
+ /* Most likely an invalid config space */
+ dev_dbg(&dev->dev, "couldn't find AFU information\n");
+ afu_idx = -1;
+ }
+
+ link->fn_desired_actags[PCI_FUNC(dev->devfn)] = 0;
+ for (i = 0; i <= afu_idx; i++) {
+ /*
+ * AFU index 'holes' are allowed. So don't fail if we
+ * can't read the actag info for an index
+ */
+ rc = get_actag_count(dev, i, &actag);
+ if (rc)
+ continue;
+ link->fn_desired_actags[PCI_FUNC(dev->devfn)] += actag;
+ }
+ dev_dbg(&dev->dev, "total actags for function: %d\n",
+ link->fn_desired_actags[PCI_FUNC(dev->devfn)]);
+
+ mutex_unlock(&links_list_lock);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_ocxl_fixup_actag);
+
+static u16 assign_fn_actags(u16 desired, u16 total)
+{
+ u16 count;
+
+ if (total <= PNV_OCXL_ACTAG_MAX)
+ count = desired;
+ else
+ count = PNV_OCXL_ACTAG_MAX * desired / total;
+
+ return count;
+}
+
+static void assign_actags(struct npu_link *link)
+{
+ u16 actag_count, range_start = 0, total_desired = 0;
+ int i;
+
+ for (i = 0; i < 8; i++)
+ total_desired += link->fn_desired_actags[i];
+
+ for (i = 0; i < 8; i++) {
+ if (link->fn_desired_actags[i]) {
+ actag_count = assign_fn_actags(
+ link->fn_desired_actags[i],
+ total_desired);
+ link->fn_actags[i].start = range_start;
+ link->fn_actags[i].count = actag_count;
+ range_start += actag_count;
+ WARN_ON(range_start >= PNV_OCXL_ACTAG_MAX);
+ }
+ pr_debug("link %x:%x:%x fct %d actags: start=%d count=%d (desired=%d)\n",
+ link->domain, link->bus, link->dev, i,
+ link->fn_actags[i].start, link->fn_actags[i].count,
+ link->fn_desired_actags[i]);
+ }
+ link->assignment_done = true;
+}
+
+int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled,
+ u16 *supported)
+{
+ struct npu_link *link;
+
+ mutex_lock(&links_list_lock);
+
+ link = find_link(dev);
+ if (!link) {
+ dev_err(&dev->dev, "actag information not found\n");
+ mutex_unlock(&links_list_lock);
+ return -ENODEV;
+ }
+ /*
+ * On p9, we only have 64 actags per link, so they must be
+ * shared by all the functions of the same adapter. We counted
+ * the desired actag counts during PCI enumeration, so that we
+ * can allocate a pro-rated number of actags to each function.
+ */
+ if (!link->assignment_done)
+ assign_actags(link);
+
+ *base = link->fn_actags[PCI_FUNC(dev->devfn)].start;
+ *enabled = link->fn_actags[PCI_FUNC(dev->devfn)].count;
+ *supported = link->fn_desired_actags[PCI_FUNC(dev->devfn)];
+
+ mutex_unlock(&links_list_lock);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_get_actag);
+
+int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count)
+{
+ struct npu_link *link;
+ int i, rc = -EINVAL;
+
+ /*
+ * The number of PASIDs (process address space ID) which can
+ * be used by a function depends on how many functions exist
+ * on the device. The NPU needs to be configured to know how
+ * many bits are available to PASIDs and how many are to be
+ * used by the function BDF indentifier.
+ *
+ * We only support one AFU-carrying function for now.
+ */
+ mutex_lock(&links_list_lock);
+
+ link = find_link(dev);
+ if (!link) {
+ dev_err(&dev->dev, "actag information not found\n");
+ mutex_unlock(&links_list_lock);
+ return -ENODEV;
+ }
+
+ for (i = 0; i < 8; i++)
+ if (link->fn_desired_actags[i] && (i == PCI_FUNC(dev->devfn))) {
+ *count = PNV_OCXL_PASID_MAX;
+ rc = 0;
+ break;
+ }
+
+ mutex_unlock(&links_list_lock);
+ dev_dbg(&dev->dev, "%d PASIDs available for function\n",
+ rc ? 0 : *count);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_get_pasid_count);
+
+static void set_templ_rate(unsigned int templ, unsigned int rate, char *buf)
+{
+ int shift, idx;
+
+ WARN_ON(templ > PNV_OCXL_TL_MAX_TEMPLATE);
+ idx = (PNV_OCXL_TL_MAX_TEMPLATE - templ) / 2;
+ shift = 4 * (1 - ((PNV_OCXL_TL_MAX_TEMPLATE - templ) % 2));
+ buf[idx] |= rate << shift;
+}
+
+int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
+ char *rate_buf, int rate_buf_size)
+{
+ if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
+ return -EINVAL;
+ /*
+ * The TL capabilities are a characteristic of the NPU, so
+ * we go with hard-coded values.
+ *
+ * The receiving rate of each template is encoded on 4 bits.
+ *
+ * On P9:
+ * - templates 0 -> 3 are supported
+ * - templates 0, 1 and 3 have a 0 receiving rate
+ * - template 2 has receiving rate of 1 (extra cycle)
+ */
+ memset(rate_buf, 0, rate_buf_size);
+ set_templ_rate(2, 1, rate_buf);
+ *cap = PNV_OCXL_TL_P9_RECV_CAP;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_get_tl_cap);
+
+int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
+ uint64_t rate_buf_phys, int rate_buf_size)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ int rc;
+
+ if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
+ return -EINVAL;
+
+ rc = opal_npu_tl_set(phb->opal_id, dev->devfn, cap,
+ rate_buf_phys, rate_buf_size);
+ if (rc) {
+ dev_err(&dev->dev, "Can't configure host TL: %d\n", rc);
+ return -EINVAL;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_set_tl_conf);
+
+int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq)
+{
+ int rc;
+
+ rc = of_property_read_u32(dev->dev.of_node, "ibm,opal-xsl-irq", hwirq);
+ if (rc) {
+ dev_err(&dev->dev,
+ "Can't get translation interrupt for device\n");
+ return rc;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_get_xsl_irq);
+
+void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar,
+ void __iomem *tfc, void __iomem *pe_handle)
+{
+ iounmap(dsisr);
+ iounmap(dar);
+ iounmap(tfc);
+ iounmap(pe_handle);
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_xsl_regs);
+
+int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr,
+ void __iomem **dar, void __iomem **tfc,
+ void __iomem **pe_handle)
+{
+ u64 reg;
+ int i, j, rc = 0;
+ void __iomem *regs[4];
+
+ /*
+ * opal stores the mmio addresses of the DSISR, DAR, TFC and
+ * PE_HANDLE registers in a device tree property, in that
+ * order
+ */
+ for (i = 0; i < 4; i++) {
+ rc = of_property_read_u64_index(dev->dev.of_node,
+ "ibm,opal-xsl-mmio", i, &reg);
+ if (rc)
+ break;
+ regs[i] = ioremap(reg, 8);
+ if (!regs[i]) {
+ rc = -EINVAL;
+ break;
+ }
+ }
+ if (rc) {
+ dev_err(&dev->dev, "Can't map translation mmio registers\n");
+ for (j = i - 1; j >= 0; j--)
+ iounmap(regs[j]);
+ } else {
+ *dsisr = regs[0];
+ *dar = regs[1];
+ *tfc = regs[2];
+ *pe_handle = regs[3];
+ }
+ return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_map_xsl_regs);
+
+struct spa_data {
+ u64 phb_opal_id;
+ u32 bdfn;
+};
+
+int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask,
+ void **platform_data)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ struct spa_data *data;
+ u32 bdfn;
+ int rc;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ bdfn = (dev->bus->number << 8) | dev->devfn;
+ rc = opal_npu_spa_setup(phb->opal_id, bdfn, virt_to_phys(spa_mem),
+ PE_mask);
+ if (rc) {
+ dev_err(&dev->dev, "Can't setup Shared Process Area: %d\n", rc);
+ kfree(data);
+ return rc;
+ }
+ data->phb_opal_id = phb->opal_id;
+ data->bdfn = bdfn;
+ *platform_data = (void *) data;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_spa_setup);
+
+void pnv_ocxl_spa_release(void *platform_data)
+{
+ struct spa_data *data = (struct spa_data *) platform_data;
+ int rc;
+
+ rc = opal_npu_spa_setup(data->phb_opal_id, data->bdfn, 0, 0);
+ WARN_ON(rc);
+ kfree(data);
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release);
+
+int pnv_ocxl_spa_remove_pe(void *platform_data, int pe_handle)
+{
+ struct spa_data *data = (struct spa_data *) platform_data;
+ int rc;
+
+ rc = opal_npu_spa_clear_cache(data->phb_opal_id, data->bdfn, pe_handle);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe);
+
+int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr)
+{
+ __be64 flags, trigger_page;
+ s64 rc;
+ u32 hwirq;
+
+ hwirq = xive_native_alloc_irq();
+ if (!hwirq)
+ return -ENOENT;
+
+ rc = opal_xive_get_irq_info(hwirq, &flags, NULL, &trigger_page, NULL,
+ NULL);
+ if (rc || !trigger_page) {
+ xive_native_free_irq(hwirq);
+ return -ENOENT;
+ }
+ *irq = hwirq;
+ *trigger_addr = be64_to_cpu(trigger_page);
+ return 0;
+
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_alloc_xive_irq);
+
+void pnv_ocxl_free_xive_irq(u32 irq)
+{
+ xive_native_free_irq(irq);
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_free_xive_irq);
diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c
index 4c827826c05e..0dc8fa4e0af2 100644
--- a/arch/powerpc/platforms/powernv/opal-dump.c
+++ b/arch/powerpc/platforms/powernv/opal-dump.c
@@ -103,9 +103,9 @@ static ssize_t dump_ack_store(struct dump_obj *dump_obj,
* due to the dynamic size of the dump
*/
static struct dump_attribute id_attribute =
- __ATTR(id, S_IRUGO, dump_id_show, NULL);
+ __ATTR(id, 0444, dump_id_show, NULL);
static struct dump_attribute type_attribute =
- __ATTR(type, S_IRUGO, dump_type_show, NULL);
+ __ATTR(type, 0444, dump_type_show, NULL);
static struct dump_attribute ack_attribute =
__ATTR(acknowledge, 0660, dump_ack_show, dump_ack_store);
diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c
index ecd6d9177d13..ba6e437abb4b 100644
--- a/arch/powerpc/platforms/powernv/opal-elog.c
+++ b/arch/powerpc/platforms/powernv/opal-elog.c
@@ -83,9 +83,9 @@ static ssize_t elog_ack_store(struct elog_obj *elog_obj,
}
static struct elog_attribute id_attribute =
- __ATTR(id, S_IRUGO, elog_id_show, NULL);
+ __ATTR(id, 0444, elog_id_show, NULL);
static struct elog_attribute type_attribute =
- __ATTR(type, S_IRUGO, elog_type_show, NULL);
+ __ATTR(type, 0444, elog_type_show, NULL);
static struct elog_attribute ack_attribute =
__ATTR(acknowledge, 0660, elog_ack_show, elog_ack_store);
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index 465ea105b771..dd4c9b8b8a81 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -21,6 +21,78 @@
#include <asm/io.h>
#include <asm/imc-pmu.h>
#include <asm/cputhreads.h>
+#include <asm/debugfs.h>
+
+static struct dentry *imc_debugfs_parent;
+
+/* Helpers to export imc command and mode via debugfs */
+static int imc_mem_get(void *data, u64 *val)
+{
+ *val = cpu_to_be64(*(u64 *)data);
+ return 0;
+}
+
+static int imc_mem_set(void *data, u64 val)
+{
+ *(u64 *)data = cpu_to_be64(val);
+ return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(fops_imc_x64, imc_mem_get, imc_mem_set, "0x%016llx\n");
+
+static struct dentry *imc_debugfs_create_x64(const char *name, umode_t mode,
+ struct dentry *parent, u64 *value)
+{
+ return debugfs_create_file_unsafe(name, mode, parent,
+ value, &fops_imc_x64);
+}
+
+/*
+ * export_imc_mode_and_cmd: Create a debugfs interface
+ * for imc_cmd and imc_mode
+ * for each node in the system.
+ * imc_mode and imc_cmd can be changed by echo into
+ * this interface.
+ */
+static void export_imc_mode_and_cmd(struct device_node *node,
+ struct imc_pmu *pmu_ptr)
+{
+ static u64 loc, *imc_mode_addr, *imc_cmd_addr;
+ int chip = 0, nid;
+ char mode[16], cmd[16];
+ u32 cb_offset;
+
+ imc_debugfs_parent = debugfs_create_dir("imc", powerpc_debugfs_root);
+
+ /*
+ * Return here, either because 'imc' directory already exists,
+ * Or failed to create a new one.
+ */
+ if (!imc_debugfs_parent)
+ return;
+
+ if (of_property_read_u32(node, "cb_offset", &cb_offset))
+ cb_offset = IMC_CNTL_BLK_OFFSET;
+
+ for_each_node(nid) {
+ loc = (u64)(pmu_ptr->mem_info[chip].vbase) + cb_offset;
+ imc_mode_addr = (u64 *)(loc + IMC_CNTL_BLK_MODE_OFFSET);
+ sprintf(mode, "imc_mode_%d", nid);
+ if (!imc_debugfs_create_x64(mode, 0600, imc_debugfs_parent,
+ imc_mode_addr))
+ goto err;
+
+ imc_cmd_addr = (u64 *)(loc + IMC_CNTL_BLK_CMD_OFFSET);
+ sprintf(cmd, "imc_cmd_%d", nid);
+ if (!imc_debugfs_create_x64(cmd, 0600, imc_debugfs_parent,
+ imc_cmd_addr))
+ goto err;
+ chip++;
+ }
+ return;
+
+err:
+ debugfs_remove_recursive(imc_debugfs_parent);
+}
/*
* imc_get_mem_addr_nest: Function to get nest counter memory region
@@ -65,6 +137,7 @@ static int imc_get_mem_addr_nest(struct device_node *node,
}
pmu_ptr->imc_counter_mmaped = true;
+ export_imc_mode_and_cmd(node, pmu_ptr);
kfree(base_addr_arr);
kfree(chipid_arr);
return 0;
@@ -213,6 +286,10 @@ static int opal_imc_counters_probe(struct platform_device *pdev)
}
}
+ /* If none of the nest units are registered, remove debugfs interface */
+ if (pmu_count == 0)
+ debugfs_remove_recursive(imc_debugfs_parent);
+
return 0;
}
diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c
index 23fb6647dced..6fd4092798d5 100644
--- a/arch/powerpc/platforms/powernv/opal-sysparam.c
+++ b/arch/powerpc/platforms/powernv/opal-sysparam.c
@@ -260,13 +260,13 @@ void __init opal_sys_param_init(void)
/* If the parameter is read-only or read-write */
switch (perm[i] & 3) {
case OPAL_SYSPARAM_READ:
- attr[i].kobj_attr.attr.mode = S_IRUGO;
+ attr[i].kobj_attr.attr.mode = 0444;
break;
case OPAL_SYSPARAM_WRITE:
- attr[i].kobj_attr.attr.mode = S_IWUSR;
+ attr[i].kobj_attr.attr.mode = 0200;
break;
case OPAL_SYSPARAM_RW:
- attr[i].kobj_attr.attr.mode = S_IRUGO | S_IWUSR;
+ attr[i].kobj_attr.attr.mode = 0644;
break;
default:
break;
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 6f4b00a2ac46..1b2936ba6040 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -320,3 +320,6 @@ OPAL_CALL(opal_set_powercap, OPAL_SET_POWERCAP);
OPAL_CALL(opal_get_power_shift_ratio, OPAL_GET_POWER_SHIFT_RATIO);
OPAL_CALL(opal_set_power_shift_ratio, OPAL_SET_POWER_SHIFT_RATIO);
OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR);
+OPAL_CALL(opal_npu_spa_setup, OPAL_NPU_SPA_SETUP);
+OPAL_CALL(opal_npu_spa_clear_cache, OPAL_NPU_SPA_CLEAR_CACHE);
+OPAL_CALL(opal_npu_tl_set, OPAL_NPU_TL_SET);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 041ddbd1fc57..c15182765ff5 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -127,7 +127,7 @@ int __init early_init_dt_scan_opal(unsigned long node,
if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
powerpc_firmware_features |= FW_FEATURE_OPAL;
- pr_info("OPAL detected !\n");
+ pr_debug("OPAL detected !\n");
} else {
panic("OPAL != V3 detected, no longer supported.\n");
}
@@ -239,8 +239,8 @@ int opal_message_notifier_register(enum opal_msg_type msg_type,
struct notifier_block *nb)
{
if (!nb || msg_type >= OPAL_MSG_TYPE_MAX) {
- pr_warning("%s: Invalid arguments, msg_type:%d\n",
- __func__, msg_type);
+ pr_warn("%s: Invalid arguments, msg_type:%d\n",
+ __func__, msg_type);
return -EINVAL;
}
@@ -281,8 +281,8 @@ static void opal_handle_message(void)
/* check for errors. */
if (ret) {
- pr_warning("%s: Failed to retrieve opal message, err=%lld\n",
- __func__, ret);
+ pr_warn("%s: Failed to retrieve opal message, err=%lld\n",
+ __func__, ret);
return;
}
@@ -461,24 +461,14 @@ static int opal_recover_mce(struct pt_regs *regs,
void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg)
{
- /*
- * This is mostly taken from kernel/panic.c, but tries to do
- * relatively minimal work. Don't use delay functions (TB may
- * be broken), don't crash dump (need to set a firmware log),
- * don't run notifiers. We do want to get some information to
- * Linux console.
- */
- console_verbose();
- bust_spinlocks(1);
+ panic_flush_kmsg_start();
+
pr_emerg("Hardware platform error: %s\n", msg);
if (regs)
show_regs(regs);
smp_send_stop();
- printk_safe_flush_on_panic();
- kmsg_dump(KMSG_DUMP_PANIC);
- bust_spinlocks(0);
- debug_locks_off();
- console_flush_on_panic();
+
+ panic_flush_kmsg_end();
/*
* Don't bother to shut things down because this will
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 9582aeb1fe4c..496e47696ed0 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -54,7 +54,8 @@
#define POWERNV_IOMMU_DEFAULT_LEVELS 1
#define POWERNV_IOMMU_MAX_LEVELS 5
-static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU" };
+static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_NVLINK",
+ "NPU_OCAPI" };
static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl);
void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
@@ -89,6 +90,7 @@ void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
}
static bool pnv_iommu_bypass_disabled __read_mostly;
+static bool pci_reset_phbs __read_mostly;
static int __init iommu_setup(char *str)
{
@@ -110,6 +112,14 @@ static int __init iommu_setup(char *str)
}
early_param("iommu", iommu_setup);
+static int __init pci_reset_phbs_setup(char *str)
+{
+ pci_reset_phbs = true;
+ return 0;
+}
+
+early_param("ppc_pci_reset_phbs", pci_reset_phbs_setup);
+
static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r)
{
/*
@@ -924,7 +934,7 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
* Configure PELTV. NPUs don't have a PELTV table so skip
* configuration on them.
*/
- if (phb->type != PNV_PHB_NPU)
+ if (phb->type != PNV_PHB_NPU_NVLINK && phb->type != PNV_PHB_NPU_OCAPI)
pnv_ioda_set_peltv(phb, pe, true);
/* Setup reverse map */
@@ -1059,8 +1069,8 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
pe = pnv_ioda_alloc_pe(phb);
if (!pe) {
- pr_warning("%s: Not enough PE# available, disabling device\n",
- pci_name(dev));
+ pr_warn("%s: Not enough PE# available, disabling device\n",
+ pci_name(dev));
return NULL;
}
@@ -1072,7 +1082,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
* At some point we want to remove the PDN completely anyways
*/
pci_dev_get(dev);
- pdn->pcidev = dev;
pdn->pe_number = pe->pe_number;
pe->flags = PNV_IODA_PE_DEV;
pe->pdev = dev;
@@ -1119,7 +1128,6 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
continue;
pe->device_count++;
- pdn->pcidev = dev;
pdn->pe_number = pe->pe_number;
if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
pnv_ioda_setup_same_PE(dev->subordinate, pe);
@@ -1164,7 +1172,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
pe = pnv_ioda_alloc_pe(phb);
if (!pe) {
- pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n",
+ pr_warn("%s: Not enough PE# available for PCI bus %04x:%02x\n",
__func__, pci_domain_nr(bus), bus->number);
return NULL;
}
@@ -1234,7 +1242,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev)
pci_dev_get(npu_pdev);
npu_pdn = pci_get_pdn(npu_pdev);
rid = npu_pdev->bus->number << 8 | npu_pdn->devfn;
- npu_pdn->pcidev = npu_pdev;
npu_pdn->pe_number = pe_num;
phb->ioda.pe_rmap[rid] = pe->pe_number;
@@ -1272,16 +1279,23 @@ static void pnv_pci_ioda_setup_PEs(void)
{
struct pci_controller *hose, *tmp;
struct pnv_phb *phb;
+ struct pci_bus *bus;
+ struct pci_dev *pdev;
list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
phb = hose->private_data;
- if (phb->type == PNV_PHB_NPU) {
+ if (phb->type == PNV_PHB_NPU_NVLINK) {
/* PE#0 is needed for error reporting */
pnv_ioda_reserve_pe(phb, 0);
pnv_ioda_setup_npu_PEs(hose->bus);
if (phb->model == PNV_PHB_MODEL_NPU2)
pnv_npu2_init(phb);
}
+ if (phb->type == PNV_PHB_NPU_OCAPI) {
+ bus = hose->bus;
+ list_for_each_entry(pdev, &bus->devices, bus_list)
+ pnv_ioda_setup_dev_PE(pdev);
+ }
}
}
@@ -1692,7 +1706,7 @@ m64_failed:
return ret;
}
-int pcibios_sriov_disable(struct pci_dev *pdev)
+int pnv_pcibios_sriov_disable(struct pci_dev *pdev)
{
pnv_pci_sriov_disable(pdev);
@@ -1701,7 +1715,7 @@ int pcibios_sriov_disable(struct pci_dev *pdev)
return 0;
}
-int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
{
/* Allocate PCI data */
add_dev_pci_data(pdev);
@@ -2572,7 +2586,6 @@ static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
unsigned long direct_table_size;
if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS) ||
- (window_size > memory_hotplug_max()) ||
!is_power_of_2(window_size))
return 0;
@@ -2640,7 +2653,7 @@ static int gpe_table_group_to_npe_cb(struct device *dev, void *opaque)
hose = pci_bus_to_host(pdev->bus);
phb = hose->private_data;
- if (phb->type != PNV_PHB_NPU)
+ if (phb->type != PNV_PHB_NPU_NVLINK)
return 0;
*ptmppe = &phb->ioda.pe_array[pdn->pe_number];
@@ -2724,7 +2737,7 @@ static void pnv_pci_ioda_setup_iommu_api(void)
list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
phb = hose->private_data;
- if (phb->type != PNV_PHB_NPU)
+ if (phb->type != PNV_PHB_NPU_NVLINK)
continue;
list_for_each_entry(pe, &phb->ioda.pe_list, list) {
@@ -3293,7 +3306,7 @@ static void pnv_pci_ioda_create_dbgfs(void)
sprintf(name, "PCI%04x", hose->global_number);
phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root);
if (!phb->dbgfs) {
- pr_warning("%s: Error on creating debugfs on PHB#%x\n",
+ pr_warn("%s: Error on creating debugfs on PHB#%x\n",
__func__, hose->global_number);
continue;
}
@@ -3774,6 +3787,13 @@ static const struct pci_controller_ops pnv_npu_ioda_controller_ops = {
.shutdown = pnv_pci_ioda_shutdown,
};
+static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = {
+ .enable_device_hook = pnv_pci_enable_device_hook,
+ .window_alignment = pnv_pci_window_alignment,
+ .reset_secondary_bus = pnv_pci_reset_secondary_bus,
+ .shutdown = pnv_pci_ioda_shutdown,
+};
+
#ifdef CONFIG_CXL_BASE
const struct pci_controller_ops pnv_cxl_cx4_ioda_controller_ops = {
.dma_dev_setup = pnv_pci_dma_dev_setup,
@@ -4007,9 +4027,14 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
*/
ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
- if (phb->type == PNV_PHB_NPU) {
+ switch (phb->type) {
+ case PNV_PHB_NPU_NVLINK:
hose->controller_ops = pnv_npu_ioda_controller_ops;
- } else {
+ break;
+ case PNV_PHB_NPU_OCAPI:
+ hose->controller_ops = pnv_npu_ocapi_ioda_controller_ops;
+ break;
+ default:
phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
hose->controller_ops = pnv_pci_ioda_controller_ops;
}
@@ -4019,6 +4044,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
#ifdef CONFIG_PCI_IOV
ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources;
ppc_md.pcibios_iov_resource_alignment = pnv_pci_iov_resource_alignment;
+ ppc_md.pcibios_sriov_enable = pnv_pcibios_sriov_enable;
+ ppc_md.pcibios_sriov_disable = pnv_pcibios_sriov_disable;
#endif
pci_add_flags(PCI_REASSIGN_ALL_RSRC);
@@ -4026,15 +4053,16 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
/* Reset IODA tables to a clean state */
rc = opal_pci_reset(phb_id, OPAL_RESET_PCI_IODA_TABLE, OPAL_ASSERT_RESET);
if (rc)
- pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc);
+ pr_warn(" OPAL Error %ld performing IODA table reset !\n", rc);
/*
* If we're running in kdump kernel, the previous kernel never
* shutdown PCI devices correctly. We already got IODA table
* cleaned out. So we have to issue PHB reset to stop all PCI
- * transactions from previous kernel.
+ * transactions from previous kernel. The ppc_pci_reset_phbs
+ * kernel parameter will force this reset too.
*/
- if (is_kdump_kernel()) {
+ if (is_kdump_kernel() || pci_reset_phbs) {
pr_info(" Issue PHB reset ...\n");
pnv_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL);
pnv_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE);
@@ -4052,8 +4080,26 @@ void __init pnv_pci_init_ioda2_phb(struct device_node *np)
void __init pnv_pci_init_npu_phb(struct device_node *np)
{
- pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU);
+ pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_NVLINK);
+}
+
+void __init pnv_pci_init_npu2_opencapi_phb(struct device_node *np)
+{
+ pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_OCAPI);
+}
+
+static void pnv_npu2_opencapi_cfg_size_fixup(struct pci_dev *dev)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+
+ if (!machine_is(powernv))
+ return;
+
+ if (phb->type == PNV_PHB_NPU_OCAPI)
+ dev->cfg_size = PCI_CFG_SPACE_EXP_SIZE;
}
+DECLARE_PCI_FIXUP_EARLY(PCI_ANY_ID, PCI_ANY_ID, pnv_npu2_opencapi_cfg_size_fixup);
void __init pnv_pci_init_ioda_hub(struct device_node *np)
{
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 5422f4a6317c..69d102cbf48f 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -1142,6 +1142,10 @@ void __init pnv_pci_init(void)
for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-phb")
pnv_pci_init_npu_phb(np);
+ /* Look for NPU2 OpenCAPI PHBs */
+ for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-opencapi-phb")
+ pnv_pci_init_npu2_opencapi_phb(np);
+
/* Configure IOMMU DMA hooks */
set_pci_dma_ops(&dma_iommu_ops);
}
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index b772d7473896..eada4b6068cb 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -12,9 +12,10 @@ struct pci_dn;
#define NV_NMMU_ATSD_REGS 8
enum pnv_phb_type {
- PNV_PHB_IODA1 = 0,
- PNV_PHB_IODA2 = 1,
- PNV_PHB_NPU = 2,
+ PNV_PHB_IODA1 = 0,
+ PNV_PHB_IODA2 = 1,
+ PNV_PHB_NPU_NVLINK = 2,
+ PNV_PHB_NPU_OCAPI = 3,
};
/* Precise PHB model for error management */
@@ -227,6 +228,7 @@ extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
extern void pnv_pci_init_ioda_hub(struct device_node *np);
extern void pnv_pci_init_ioda2_phb(struct device_node *np);
extern void pnv_pci_init_npu_phb(struct device_node *np);
+extern void pnv_pci_init_npu2_opencapi_phb(struct device_node *np);
extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index ba030669eca1..9664c8461f03 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -37,6 +37,8 @@
#include <asm/kvm_ppc.h>
#include <asm/ppc-opcode.h>
#include <asm/cpuidle.h>
+#include <asm/kexec.h>
+#include <asm/reg.h>
#include "powernv.h"
@@ -209,9 +211,32 @@ static void pnv_smp_cpu_kill_self(void)
} else if ((srr1 & wmask) == SRR1_WAKEHDBELL) {
unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
asm volatile(PPC_MSGCLR(%0) : : "r" (msg));
+ } else if ((srr1 & wmask) == SRR1_WAKERESET) {
+ irq_set_pending_from_srr1(srr1);
+ /* Does not return */
}
+
smp_mb();
+ /*
+ * For kdump kernels, we process the ipi and jump to
+ * crash_ipi_callback
+ */
+ if (kdump_in_progress()) {
+ /*
+ * If we got to this point, we've not used
+ * NMI's, otherwise we would have gone
+ * via the SRR1_WAKERESET path. We are
+ * using regular IPI's for waking up offline
+ * threads.
+ */
+ struct pt_regs regs;
+
+ ppc_save_regs(&regs);
+ crash_ipi_callback(&regs);
+ /* Does not return */
+ }
+
if (cpu_core_split_required())
continue;
@@ -371,5 +396,8 @@ void __init pnv_smp_init(void)
#ifdef CONFIG_HOTPLUG_CPU
ppc_md.cpu_die = pnv_smp_cpu_kill_self;
+#ifdef CONFIG_KEXEC_CORE
+ crash_wake_offline = 1;
+#endif
#endif
}
diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c
index e48462447ff0..e7075aaff1bb 100644
--- a/arch/powerpc/platforms/ps3/device-init.c
+++ b/arch/powerpc/platforms/ps3/device-init.c
@@ -663,8 +663,8 @@ static void ps3_find_and_add_device(u64 bus_id, u64 dev_id)
if (rem)
break;
}
- pr_warning("%s:%u: device %llu:%llu not found\n", __func__, __LINE__,
- bus_id, dev_id);
+ pr_warn("%s:%u: device %llu:%llu not found\n",
+ __func__, __LINE__, bus_id, dev_id);
return;
found:
@@ -859,11 +859,9 @@ static int ps3_probe_thread(void *data)
if (notify_event->event_type != notify_region_probe ||
notify_event->bus_id != dev.sbd.bus_id) {
- pr_warning("%s:%u: bad notify_event: event %llu, "
- "dev_id %llu, dev_type %llu\n",
- __func__, __LINE__, notify_event->event_type,
- notify_event->dev_id,
- notify_event->dev_type);
+ pr_warn("%s:%u: bad notify_event: event %llu, dev_id %llu, dev_type %llu\n",
+ __func__, __LINE__, notify_event->event_type,
+ notify_event->dev_id, notify_event->dev_type);
continue;
}
diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c
index b0f34663b1ae..7f870ec29daf 100644
--- a/arch/powerpc/platforms/ps3/mm.c
+++ b/arch/powerpc/platforms/ps3/mm.c
@@ -607,8 +607,8 @@ static int dma_ioc0_map_pages(struct ps3_dma_region *r, unsigned long phys_addr,
r->ioid,
iopte_flag);
if (result) {
- pr_warning("%s:%d: lv1_put_iopte failed: %s\n",
- __func__, __LINE__, ps3_result(result));
+ pr_warn("%s:%d: lv1_put_iopte failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
goto fail_map;
}
DBG("%s: pg=%d bus=%#lx, lpar=%#lx, ioid=%#x\n", __func__,
diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c
index 3db53e8aff92..cdbfc5cfd6f3 100644
--- a/arch/powerpc/platforms/ps3/os-area.c
+++ b/arch/powerpc/platforms/ps3/os-area.c
@@ -699,7 +699,7 @@ static void os_area_queue_work_handler(struct work_struct *work)
error = update_flash_db();
if (error)
- pr_warning("%s: Could not update FLASH ROM\n", __func__);
+ pr_warn("%s: Could not update FLASH ROM\n", __func__);
pr_debug(" <- %s:%d\n", __func__, __LINE__);
}
diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c
index 6244bc849469..77a37520068d 100644
--- a/arch/powerpc/platforms/ps3/setup.c
+++ b/arch/powerpc/platforms/ps3/setup.c
@@ -113,6 +113,7 @@ static void ps3_panic(char *str)
printk(" System does not reboot automatically.\n");
printk(" Please press POWER button.\n");
printk("\n");
+ panic_flush_kmsg_end();
while(1)
lv1_pause(1);
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
index 560aefde06c0..25427a48feae 100644
--- a/arch/powerpc/platforms/pseries/cmm.c
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -72,20 +72,20 @@ MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager");
MODULE_LICENSE("GPL");
MODULE_VERSION(CMM_DRIVER_VERSION);
-module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR);
+module_param_named(delay, delay, uint, 0644);
MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
"[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
-module_param_named(hotplug_delay, hotplug_delay, uint, S_IRUGO | S_IWUSR);
+module_param_named(hotplug_delay, hotplug_delay, uint, 0644);
MODULE_PARM_DESC(hotplug_delay, "Delay (in seconds) after memory hotplug remove "
"before loaning resumes. "
"[Default=" __stringify(CMM_HOTPLUG_DELAY) "]");
-module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR);
+module_param_named(oom_kb, oom_kb, uint, 0644);
MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
"[Default=" __stringify(CMM_OOM_KB) "]");
-module_param_named(min_mem_mb, min_mem_mb, ulong, S_IRUGO | S_IWUSR);
+module_param_named(min_mem_mb, min_mem_mb, ulong, 0644);
MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
"[Default=" __stringify(CMM_MIN_MEM_MB) "]");
-module_param_named(debug, cmm_debug, uint, S_IRUGO | S_IWUSR);
+module_param_named(debug, cmm_debug, uint, 0644);
MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
"[Default=" __stringify(CMM_DEBUG) "]");
@@ -385,7 +385,7 @@ static int cmm_thread(void *dummy)
{ \
return sprintf(buf, format, ##args); \
} \
- static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
+ static DEVICE_ATTR(name, 0444, show_##name, NULL)
CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(loaned_pages));
CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
@@ -411,7 +411,7 @@ static ssize_t store_oom_pages(struct device *dev,
return count;
}
-static DEVICE_ATTR(oom_freed_kb, S_IWUSR | S_IRUGO,
+static DEVICE_ATTR(oom_freed_kb, 0644,
show_oom_pages, store_oom_pages);
static struct device_attribute *cmm_attrs[] = {
@@ -765,7 +765,7 @@ static int cmm_set_disable(const char *val, const struct kernel_param *kp)
}
module_param_call(disable, cmm_set_disable, param_get_uint,
- &cmm_disabled, S_IRUGO | S_IWUSR);
+ &cmm_disabled, 0644);
MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. "
"[Default=" __stringify(CMM_DISABLE) "]");
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 6b812ad990e4..823cb27efa8b 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -55,6 +55,43 @@ static int ibm_get_config_addr_info;
static int ibm_get_config_addr_info2;
static int ibm_configure_pe;
+#ifdef CONFIG_PCI_IOV
+void pseries_pcibios_bus_add_device(struct pci_dev *pdev)
+{
+ struct pci_dn *pdn = pci_get_pdn(pdev);
+ struct pci_dn *physfn_pdn;
+ struct eeh_dev *edev;
+
+ if (!pdev->is_virtfn)
+ return;
+
+ pdn->device_id = pdev->device;
+ pdn->vendor_id = pdev->vendor;
+ pdn->class_code = pdev->class;
+ /*
+ * Last allow unfreeze return code used for retrieval
+ * by user space in eeh-sysfs to show the last command
+ * completion from platform.
+ */
+ pdn->last_allow_rc = 0;
+ physfn_pdn = pci_get_pdn(pdev->physfn);
+ pdn->pe_number = physfn_pdn->pe_num_map[pdn->vf_index];
+ edev = pdn_to_eeh_dev(pdn);
+
+ /*
+ * The following operations will fail if VF's sysfs files
+ * aren't created or its resources aren't finalized.
+ */
+ eeh_add_device_early(pdn);
+ eeh_add_device_late(pdev);
+ edev->pe_config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
+ eeh_rmv_from_parent_pe(edev); /* Remove as it is adding to bus pe */
+ eeh_add_to_parent_pe(edev); /* Add as VF PE type */
+ eeh_sysfs_add_device(pdev);
+
+}
+#endif
+
/*
* Buffer for reporting slot-error-detail rtas calls. Its here
* in BSS, and not dynamically alloced, so that it ends up in
@@ -120,6 +157,11 @@ static int pseries_eeh_init(void)
/* Set EEH probe mode */
eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG);
+#ifdef CONFIG_PCI_IOV
+ /* Set EEH machine dependent code */
+ ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device;
+#endif
+
return 0;
}
@@ -684,6 +726,121 @@ static int pseries_eeh_write_config(struct pci_dn *pdn, int where, int size, u32
return rtas_write_config(pdn, where, size, val);
}
+static int pseries_eeh_restore_config(struct pci_dn *pdn)
+{
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+ s64 ret = 0;
+
+ if (!edev)
+ return -EEXIST;
+
+ /*
+ * FIXME: The MPS, error routing rules, timeout setting are worthy
+ * to be exported by firmware in extendible way.
+ */
+ if (edev->physfn)
+ ret = eeh_restore_vf_config(pdn);
+
+ if (ret) {
+ pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n",
+ __func__, edev->pe_config_addr, ret);
+ return -EIO;
+ }
+
+ return ret;
+}
+
+#ifdef CONFIG_PCI_IOV
+int pseries_send_allow_unfreeze(struct pci_dn *pdn,
+ u16 *vf_pe_array, int cur_vfs)
+{
+ int rc;
+ int ibm_allow_unfreeze = rtas_token("ibm,open-sriov-allow-unfreeze");
+ unsigned long buid, addr;
+
+ addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
+ buid = pdn->phb->buid;
+ spin_lock(&rtas_data_buf_lock);
+ memcpy(rtas_data_buf, vf_pe_array, RTAS_DATA_BUF_SIZE);
+ rc = rtas_call(ibm_allow_unfreeze, 5, 1, NULL,
+ addr,
+ BUID_HI(buid),
+ BUID_LO(buid),
+ rtas_data_buf, cur_vfs * sizeof(u16));
+ spin_unlock(&rtas_data_buf_lock);
+ if (rc)
+ pr_warn("%s: Failed to allow unfreeze for PHB#%x-PE#%lx, rc=%x\n",
+ __func__,
+ pdn->phb->global_number, addr, rc);
+ return rc;
+}
+
+static int pseries_call_allow_unfreeze(struct eeh_dev *edev)
+{
+ struct pci_dn *pdn, *tmp, *parent, *physfn_pdn;
+ int cur_vfs = 0, rc = 0, vf_index, bus, devfn;
+ u16 *vf_pe_array;
+
+ vf_pe_array = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
+ if (!vf_pe_array)
+ return -ENOMEM;
+ if (pci_num_vf(edev->physfn ? edev->physfn : edev->pdev)) {
+ if (edev->pdev->is_physfn) {
+ cur_vfs = pci_num_vf(edev->pdev);
+ pdn = eeh_dev_to_pdn(edev);
+ parent = pdn->parent;
+ for (vf_index = 0; vf_index < cur_vfs; vf_index++)
+ vf_pe_array[vf_index] =
+ cpu_to_be16(pdn->pe_num_map[vf_index]);
+ rc = pseries_send_allow_unfreeze(pdn, vf_pe_array,
+ cur_vfs);
+ pdn->last_allow_rc = rc;
+ for (vf_index = 0; vf_index < cur_vfs; vf_index++) {
+ list_for_each_entry_safe(pdn, tmp,
+ &parent->child_list,
+ list) {
+ bus = pci_iov_virtfn_bus(edev->pdev,
+ vf_index);
+ devfn = pci_iov_virtfn_devfn(edev->pdev,
+ vf_index);
+ if (pdn->busno != bus ||
+ pdn->devfn != devfn)
+ continue;
+ pdn->last_allow_rc = rc;
+ }
+ }
+ } else {
+ pdn = pci_get_pdn(edev->pdev);
+ vf_pe_array[0] = cpu_to_be16(pdn->pe_number);
+ physfn_pdn = pci_get_pdn(edev->physfn);
+ rc = pseries_send_allow_unfreeze(physfn_pdn,
+ vf_pe_array, 1);
+ pdn->last_allow_rc = rc;
+ }
+ }
+
+ kfree(vf_pe_array);
+ return rc;
+}
+
+static int pseries_notify_resume(struct pci_dn *pdn)
+{
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+
+ if (!edev)
+ return -EEXIST;
+
+ if (rtas_token("ibm,open-sriov-allow-unfreeze")
+ == RTAS_UNKNOWN_SERVICE)
+ return -EINVAL;
+
+ if (edev->pdev->is_physfn || edev->pdev->is_virtfn)
+ return pseries_call_allow_unfreeze(edev);
+
+ return 0;
+}
+#endif
+
static struct eeh_ops pseries_eeh_ops = {
.name = "pseries",
.init = pseries_eeh_init,
@@ -699,7 +856,10 @@ static struct eeh_ops pseries_eeh_ops = {
.read_config = pseries_eeh_read_config,
.write_config = pseries_eeh_write_config,
.next_error = NULL,
- .restore_config = NULL
+ .restore_config = pseries_eeh_restore_config,
+#ifdef CONFIG_PCI_IOV
+ .notify_resume = pseries_notify_resume
+#endif
};
/**
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
index 63cc82ad58ac..a3bbeb43689e 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -114,6 +114,8 @@ static __initdata struct vec5_fw_feature
vec5_fw_features_table[] = {
{FW_FEATURE_TYPE1_AFFINITY, OV5_TYPE1_AFFINITY},
{FW_FEATURE_PRRN, OV5_PRRN},
+ {FW_FEATURE_DRMEM_V2, OV5_DRMEM_V2},
+ {FW_FEATURE_DRC_INFO, OV5_DRC_INFO},
};
static void __init fw_vec5_feature_init(const char *vec5, unsigned long len)
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index a7d14aa7bb7c..dceb51454d8d 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -340,6 +340,8 @@ static void pseries_remove_processor(struct device_node *np)
cpu_maps_update_done();
}
+extern int find_and_online_cpu_nid(int cpu);
+
static int dlpar_online_cpu(struct device_node *dn)
{
int rc = 0;
@@ -364,6 +366,7 @@ static int dlpar_online_cpu(struct device_node *dn)
!= CPU_STATE_OFFLINE);
cpu_maps_update_done();
timed_topology_update(1);
+ find_and_online_cpu_nid(cpu);
rc = device_online(get_cpu_device(cpu));
if (rc)
goto out;
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 1d48ab424bd9..c1578f54c626 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -23,6 +23,7 @@
#include <asm/prom.h>
#include <asm/sparsemem.h>
#include <asm/fadump.h>
+#include <asm/drmem.h>
#include "pseries.h"
static bool rtas_hp_event;
@@ -100,100 +101,6 @@ static struct property *dlpar_clone_property(struct property *prop,
return new_prop;
}
-static struct property *dlpar_clone_drconf_property(struct device_node *dn)
-{
- struct property *prop, *new_prop;
- struct of_drconf_cell *lmbs;
- u32 num_lmbs, *p;
- int i;
-
- prop = of_find_property(dn, "ibm,dynamic-memory", NULL);
- if (!prop)
- return NULL;
-
- new_prop = dlpar_clone_property(prop, prop->length);
- if (!new_prop)
- return NULL;
-
- /* Convert the property to cpu endian-ness */
- p = new_prop->value;
- *p = be32_to_cpu(*p);
-
- num_lmbs = *p++;
- lmbs = (struct of_drconf_cell *)p;
-
- for (i = 0; i < num_lmbs; i++) {
- lmbs[i].base_addr = be64_to_cpu(lmbs[i].base_addr);
- lmbs[i].drc_index = be32_to_cpu(lmbs[i].drc_index);
- lmbs[i].aa_index = be32_to_cpu(lmbs[i].aa_index);
- lmbs[i].flags = be32_to_cpu(lmbs[i].flags);
- }
-
- return new_prop;
-}
-
-static void dlpar_update_drconf_property(struct device_node *dn,
- struct property *prop)
-{
- struct of_drconf_cell *lmbs;
- u32 num_lmbs, *p;
- int i;
-
- /* Convert the property back to BE */
- p = prop->value;
- num_lmbs = *p;
- *p = cpu_to_be32(*p);
- p++;
-
- lmbs = (struct of_drconf_cell *)p;
- for (i = 0; i < num_lmbs; i++) {
- lmbs[i].base_addr = cpu_to_be64(lmbs[i].base_addr);
- lmbs[i].drc_index = cpu_to_be32(lmbs[i].drc_index);
- lmbs[i].aa_index = cpu_to_be32(lmbs[i].aa_index);
- lmbs[i].flags = cpu_to_be32(lmbs[i].flags);
- }
-
- rtas_hp_event = true;
- of_update_property(dn, prop);
- rtas_hp_event = false;
-}
-
-static int dlpar_update_device_tree_lmb(struct of_drconf_cell *lmb)
-{
- struct device_node *dn;
- struct property *prop;
- struct of_drconf_cell *lmbs;
- u32 *p, num_lmbs;
- int i;
-
- dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
- if (!dn)
- return -ENODEV;
-
- prop = dlpar_clone_drconf_property(dn);
- if (!prop) {
- of_node_put(dn);
- return -ENODEV;
- }
-
- p = prop->value;
- num_lmbs = *p++;
- lmbs = (struct of_drconf_cell *)p;
-
- for (i = 0; i < num_lmbs; i++) {
- if (lmbs[i].drc_index == lmb->drc_index) {
- lmbs[i].flags = lmb->flags;
- lmbs[i].aa_index = lmb->aa_index;
-
- dlpar_update_drconf_property(dn, prop);
- break;
- }
- }
-
- of_node_put(dn);
- return 0;
-}
-
static u32 find_aa_index(struct device_node *dr_node,
struct property *ala_prop, const u32 *lmb_assoc)
{
@@ -256,7 +163,7 @@ static u32 find_aa_index(struct device_node *dr_node,
return aa_index;
}
-static u32 lookup_lmb_associativity_index(struct of_drconf_cell *lmb)
+static u32 lookup_lmb_associativity_index(struct drmem_lmb *lmb)
{
struct device_node *parent, *lmb_node, *dr_node;
struct property *ala_prop;
@@ -299,9 +206,9 @@ static u32 lookup_lmb_associativity_index(struct of_drconf_cell *lmb)
return aa_index;
}
-static int dlpar_add_device_tree_lmb(struct of_drconf_cell *lmb)
+static int dlpar_add_device_tree_lmb(struct drmem_lmb *lmb)
{
- int aa_index;
+ int rc, aa_index;
lmb->flags |= DRCONF_MEM_ASSIGNED;
@@ -313,17 +220,29 @@ static int dlpar_add_device_tree_lmb(struct of_drconf_cell *lmb)
}
lmb->aa_index = aa_index;
- return dlpar_update_device_tree_lmb(lmb);
+
+ rtas_hp_event = true;
+ rc = drmem_update_dt();
+ rtas_hp_event = false;
+
+ return rc;
}
-static int dlpar_remove_device_tree_lmb(struct of_drconf_cell *lmb)
+static int dlpar_remove_device_tree_lmb(struct drmem_lmb *lmb)
{
+ int rc;
+
lmb->flags &= ~DRCONF_MEM_ASSIGNED;
lmb->aa_index = 0xffffffff;
- return dlpar_update_device_tree_lmb(lmb);
+
+ rtas_hp_event = true;
+ rc = drmem_update_dt();
+ rtas_hp_event = false;
+
+ return rc;
}
-static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
+static struct memory_block *lmb_to_memblock(struct drmem_lmb *lmb)
{
unsigned long section_nr;
struct mem_section *mem_sect;
@@ -336,7 +255,36 @@ static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
return mem_block;
}
-static int dlpar_change_lmb_state(struct of_drconf_cell *lmb, bool online)
+static int get_lmb_range(u32 drc_index, int n_lmbs,
+ struct drmem_lmb **start_lmb,
+ struct drmem_lmb **end_lmb)
+{
+ struct drmem_lmb *lmb, *start, *end;
+ struct drmem_lmb *last_lmb;
+
+ start = NULL;
+ for_each_drmem_lmb(lmb) {
+ if (lmb->drc_index == drc_index) {
+ start = lmb;
+ break;
+ }
+ }
+
+ if (!start)
+ return -EINVAL;
+
+ end = &start[n_lmbs - 1];
+
+ last_lmb = &drmem_info->lmbs[drmem_info->n_lmbs - 1];
+ if (end > last_lmb)
+ return -EINVAL;
+
+ *start_lmb = start;
+ *end_lmb = end;
+ return 0;
+}
+
+static int dlpar_change_lmb_state(struct drmem_lmb *lmb, bool online)
{
struct memory_block *mem_block;
int rc;
@@ -357,13 +305,13 @@ static int dlpar_change_lmb_state(struct of_drconf_cell *lmb, bool online)
return rc;
}
-static int dlpar_online_lmb(struct of_drconf_cell *lmb)
+static int dlpar_online_lmb(struct drmem_lmb *lmb)
{
return dlpar_change_lmb_state(lmb, true);
}
#ifdef CONFIG_MEMORY_HOTREMOVE
-static int dlpar_offline_lmb(struct of_drconf_cell *lmb)
+static int dlpar_offline_lmb(struct drmem_lmb *lmb)
{
return dlpar_change_lmb_state(lmb, false);
}
@@ -426,7 +374,7 @@ static int pseries_remove_mem_node(struct device_node *np)
return 0;
}
-static bool lmb_is_removable(struct of_drconf_cell *lmb)
+static bool lmb_is_removable(struct drmem_lmb *lmb)
{
int i, scns_per_block;
int rc = 1;
@@ -458,9 +406,9 @@ static bool lmb_is_removable(struct of_drconf_cell *lmb)
return rc ? true : false;
}
-static int dlpar_add_lmb(struct of_drconf_cell *);
+static int dlpar_add_lmb(struct drmem_lmb *);
-static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
+static int dlpar_remove_lmb(struct drmem_lmb *lmb)
{
unsigned long block_sz;
int nid, rc;
@@ -484,28 +432,25 @@ static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
return 0;
}
-static int dlpar_memory_remove_by_count(u32 lmbs_to_remove,
- struct property *prop)
+static int dlpar_memory_remove_by_count(u32 lmbs_to_remove)
{
- struct of_drconf_cell *lmbs;
+ struct drmem_lmb *lmb;
int lmbs_removed = 0;
int lmbs_available = 0;
- u32 num_lmbs, *p;
- int i, rc;
+ int rc;
pr_info("Attempting to hot-remove %d LMB(s)\n", lmbs_to_remove);
if (lmbs_to_remove == 0)
return -EINVAL;
- p = prop->value;
- num_lmbs = *p++;
- lmbs = (struct of_drconf_cell *)p;
-
/* Validate that there are enough LMBs to satisfy the request */
- for (i = 0; i < num_lmbs; i++) {
- if (lmb_is_removable(&lmbs[i]))
+ for_each_drmem_lmb(lmb) {
+ if (lmb_is_removable(lmb))
lmbs_available++;
+
+ if (lmbs_available == lmbs_to_remove)
+ break;
}
if (lmbs_available < lmbs_to_remove) {
@@ -514,45 +459,47 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove,
return -EINVAL;
}
- for (i = 0; i < num_lmbs && lmbs_removed < lmbs_to_remove; i++) {
- rc = dlpar_remove_lmb(&lmbs[i]);
+ for_each_drmem_lmb(lmb) {
+ rc = dlpar_remove_lmb(lmb);
if (rc)
continue;
- lmbs_removed++;
-
/* Mark this lmb so we can add it later if all of the
* requested LMBs cannot be removed.
*/
- lmbs[i].reserved = 1;
+ drmem_mark_lmb_reserved(lmb);
+
+ lmbs_removed++;
+ if (lmbs_removed == lmbs_to_remove)
+ break;
}
if (lmbs_removed != lmbs_to_remove) {
pr_err("Memory hot-remove failed, adding LMB's back\n");
- for (i = 0; i < num_lmbs; i++) {
- if (!lmbs[i].reserved)
+ for_each_drmem_lmb(lmb) {
+ if (!drmem_lmb_reserved(lmb))
continue;
- rc = dlpar_add_lmb(&lmbs[i]);
+ rc = dlpar_add_lmb(lmb);
if (rc)
pr_err("Failed to add LMB back, drc index %x\n",
- lmbs[i].drc_index);
+ lmb->drc_index);
- lmbs[i].reserved = 0;
+ drmem_remove_lmb_reservation(lmb);
}
rc = -EINVAL;
} else {
- for (i = 0; i < num_lmbs; i++) {
- if (!lmbs[i].reserved)
+ for_each_drmem_lmb(lmb) {
+ if (!drmem_lmb_reserved(lmb))
continue;
- dlpar_release_drc(lmbs[i].drc_index);
+ dlpar_release_drc(lmb->drc_index);
pr_info("Memory at %llx was hot-removed\n",
- lmbs[i].base_addr);
+ lmb->base_addr);
- lmbs[i].reserved = 0;
+ drmem_remove_lmb_reservation(lmb);
}
rc = 0;
}
@@ -560,26 +507,21 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove,
return rc;
}
-static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop)
+static int dlpar_memory_remove_by_index(u32 drc_index)
{
- struct of_drconf_cell *lmbs;
- u32 num_lmbs, *p;
+ struct drmem_lmb *lmb;
int lmb_found;
- int i, rc;
+ int rc;
pr_info("Attempting to hot-remove LMB, drc index %x\n", drc_index);
- p = prop->value;
- num_lmbs = *p++;
- lmbs = (struct of_drconf_cell *)p;
-
lmb_found = 0;
- for (i = 0; i < num_lmbs; i++) {
- if (lmbs[i].drc_index == drc_index) {
+ for_each_drmem_lmb(lmb) {
+ if (lmb->drc_index == drc_index) {
lmb_found = 1;
- rc = dlpar_remove_lmb(&lmbs[i]);
+ rc = dlpar_remove_lmb(lmb);
if (!rc)
- dlpar_release_drc(lmbs[i].drc_index);
+ dlpar_release_drc(lmb->drc_index);
break;
}
@@ -590,35 +532,30 @@ static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop)
if (rc)
pr_info("Failed to hot-remove memory at %llx\n",
- lmbs[i].base_addr);
+ lmb->base_addr);
else
- pr_info("Memory at %llx was hot-removed\n", lmbs[i].base_addr);
+ pr_info("Memory at %llx was hot-removed\n", lmb->base_addr);
return rc;
}
-static int dlpar_memory_readd_by_index(u32 drc_index, struct property *prop)
+static int dlpar_memory_readd_by_index(u32 drc_index)
{
- struct of_drconf_cell *lmbs;
- u32 num_lmbs, *p;
+ struct drmem_lmb *lmb;
int lmb_found;
- int i, rc;
+ int rc;
pr_info("Attempting to update LMB, drc index %x\n", drc_index);
- p = prop->value;
- num_lmbs = *p++;
- lmbs = (struct of_drconf_cell *)p;
-
lmb_found = 0;
- for (i = 0; i < num_lmbs; i++) {
- if (lmbs[i].drc_index == drc_index) {
+ for_each_drmem_lmb(lmb) {
+ if (lmb->drc_index == drc_index) {
lmb_found = 1;
- rc = dlpar_remove_lmb(&lmbs[i]);
+ rc = dlpar_remove_lmb(lmb);
if (!rc) {
- rc = dlpar_add_lmb(&lmbs[i]);
+ rc = dlpar_add_lmb(lmb);
if (rc)
- dlpar_release_drc(lmbs[i].drc_index);
+ dlpar_release_drc(lmb->drc_index);
}
break;
}
@@ -629,20 +566,18 @@ static int dlpar_memory_readd_by_index(u32 drc_index, struct property *prop)
if (rc)
pr_info("Failed to update memory at %llx\n",
- lmbs[i].base_addr);
+ lmb->base_addr);
else
- pr_info("Memory at %llx was updated\n", lmbs[i].base_addr);
+ pr_info("Memory at %llx was updated\n", lmb->base_addr);
return rc;
}
-static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index,
- struct property *prop)
+static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
{
- struct of_drconf_cell *lmbs;
- u32 num_lmbs, *p;
- int i, rc, start_lmb_found;
- int lmbs_available = 0, start_index = 0, end_index;
+ struct drmem_lmb *lmb, *start_lmb, *end_lmb;
+ int lmbs_available = 0;
+ int rc;
pr_info("Attempting to hot-remove %u LMB(s) at %x\n",
lmbs_to_remove, drc_index);
@@ -650,29 +585,13 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index,
if (lmbs_to_remove == 0)
return -EINVAL;
- p = prop->value;
- num_lmbs = *p++;
- lmbs = (struct of_drconf_cell *)p;
- start_lmb_found = 0;
-
- /* Navigate to drc_index */
- while (start_index < num_lmbs) {
- if (lmbs[start_index].drc_index == drc_index) {
- start_lmb_found = 1;
- break;
- }
-
- start_index++;
- }
-
- if (!start_lmb_found)
+ rc = get_lmb_range(drc_index, lmbs_to_remove, &start_lmb, &end_lmb);
+ if (rc)
return -EINVAL;
- end_index = start_index + lmbs_to_remove;
-
/* Validate that there are enough LMBs to satisfy the request */
- for (i = start_index; i < end_index; i++) {
- if (lmbs[i].flags & DRCONF_MEM_RESERVED)
+ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+ if (lmb->flags & DRCONF_MEM_RESERVED)
break;
lmbs_available++;
@@ -681,42 +600,43 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index,
if (lmbs_available < lmbs_to_remove)
return -EINVAL;
- for (i = start_index; i < end_index; i++) {
- if (!(lmbs[i].flags & DRCONF_MEM_ASSIGNED))
+ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+ if (!(lmb->flags & DRCONF_MEM_ASSIGNED))
continue;
- rc = dlpar_remove_lmb(&lmbs[i]);
+ rc = dlpar_remove_lmb(lmb);
if (rc)
break;
- lmbs[i].reserved = 1;
+ drmem_mark_lmb_reserved(lmb);
}
if (rc) {
pr_err("Memory indexed-count-remove failed, adding any removed LMBs\n");
- for (i = start_index; i < end_index; i++) {
- if (!lmbs[i].reserved)
+
+ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+ if (!drmem_lmb_reserved(lmb))
continue;
- rc = dlpar_add_lmb(&lmbs[i]);
+ rc = dlpar_add_lmb(lmb);
if (rc)
pr_err("Failed to add LMB, drc index %x\n",
- be32_to_cpu(lmbs[i].drc_index));
+ lmb->drc_index);
- lmbs[i].reserved = 0;
+ drmem_remove_lmb_reservation(lmb);
}
rc = -EINVAL;
} else {
- for (i = start_index; i < end_index; i++) {
- if (!lmbs[i].reserved)
+ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+ if (!drmem_lmb_reserved(lmb))
continue;
- dlpar_release_drc(lmbs[i].drc_index);
+ dlpar_release_drc(lmb->drc_index);
pr_info("Memory at %llx (drc index %x) was hot-removed\n",
- lmbs[i].base_addr, lmbs[i].drc_index);
+ lmb->base_addr, lmb->drc_index);
- lmbs[i].reserved = 0;
+ drmem_remove_lmb_reservation(lmb);
}
}
@@ -737,32 +657,30 @@ static inline int dlpar_memory_remove(struct pseries_hp_errorlog *hp_elog)
{
return -EOPNOTSUPP;
}
-static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
+static int dlpar_remove_lmb(struct drmem_lmb *lmb)
{
return -EOPNOTSUPP;
}
-static int dlpar_memory_remove_by_count(u32 lmbs_to_remove,
- struct property *prop)
+static int dlpar_memory_remove_by_count(u32 lmbs_to_remove)
{
return -EOPNOTSUPP;
}
-static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop)
+static int dlpar_memory_remove_by_index(u32 drc_index)
{
return -EOPNOTSUPP;
}
-static int dlpar_memory_readd_by_index(u32 drc_index, struct property *prop)
+static int dlpar_memory_readd_by_index(u32 drc_index)
{
return -EOPNOTSUPP;
}
-static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index,
- struct property *prop)
+static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
{
return -EOPNOTSUPP;
}
#endif /* CONFIG_MEMORY_HOTREMOVE */
-static int dlpar_add_lmb(struct of_drconf_cell *lmb)
+static int dlpar_add_lmb(struct drmem_lmb *lmb)
{
unsigned long block_sz;
int nid, rc;
@@ -801,77 +719,79 @@ static int dlpar_add_lmb(struct of_drconf_cell *lmb)
return rc;
}
-static int dlpar_memory_add_by_count(u32 lmbs_to_add, struct property *prop)
+static int dlpar_memory_add_by_count(u32 lmbs_to_add)
{
- struct of_drconf_cell *lmbs;
- u32 num_lmbs, *p;
+ struct drmem_lmb *lmb;
int lmbs_available = 0;
int lmbs_added = 0;
- int i, rc;
+ int rc;
pr_info("Attempting to hot-add %d LMB(s)\n", lmbs_to_add);
if (lmbs_to_add == 0)
return -EINVAL;
- p = prop->value;
- num_lmbs = *p++;
- lmbs = (struct of_drconf_cell *)p;
-
/* Validate that there are enough LMBs to satisfy the request */
- for (i = 0; i < num_lmbs; i++) {
- if (!(lmbs[i].flags & DRCONF_MEM_ASSIGNED))
+ for_each_drmem_lmb(lmb) {
+ if (!(lmb->flags & DRCONF_MEM_ASSIGNED))
lmbs_available++;
+
+ if (lmbs_available == lmbs_to_add)
+ break;
}
if (lmbs_available < lmbs_to_add)
return -EINVAL;
- for (i = 0; i < num_lmbs && lmbs_to_add != lmbs_added; i++) {
- if (lmbs[i].flags & DRCONF_MEM_ASSIGNED)
+ for_each_drmem_lmb(lmb) {
+ if (lmb->flags & DRCONF_MEM_ASSIGNED)
continue;
- rc = dlpar_acquire_drc(lmbs[i].drc_index);
+ rc = dlpar_acquire_drc(lmb->drc_index);
if (rc)
continue;
- rc = dlpar_add_lmb(&lmbs[i]);
+ rc = dlpar_add_lmb(lmb);
if (rc) {
- dlpar_release_drc(lmbs[i].drc_index);
+ dlpar_release_drc(lmb->drc_index);
continue;
}
- lmbs_added++;
-
/* Mark this lmb so we can remove it later if all of the
* requested LMBs cannot be added.
*/
- lmbs[i].reserved = 1;
+ drmem_mark_lmb_reserved(lmb);
+
+ lmbs_added++;
+ if (lmbs_added == lmbs_to_add)
+ break;
}
if (lmbs_added != lmbs_to_add) {
pr_err("Memory hot-add failed, removing any added LMBs\n");
- for (i = 0; i < num_lmbs; i++) {
- if (!lmbs[i].reserved)
+ for_each_drmem_lmb(lmb) {
+ if (!drmem_lmb_reserved(lmb))
continue;
- rc = dlpar_remove_lmb(&lmbs[i]);
+ rc = dlpar_remove_lmb(lmb);
if (rc)
pr_err("Failed to remove LMB, drc index %x\n",
- be32_to_cpu(lmbs[i].drc_index));
+ lmb->drc_index);
else
- dlpar_release_drc(lmbs[i].drc_index);
+ dlpar_release_drc(lmb->drc_index);
+
+ drmem_remove_lmb_reservation(lmb);
}
rc = -EINVAL;
} else {
- for (i = 0; i < num_lmbs; i++) {
- if (!lmbs[i].reserved)
+ for_each_drmem_lmb(lmb) {
+ if (!drmem_lmb_reserved(lmb))
continue;
pr_info("Memory at %llx (drc index %x) was hot-added\n",
- lmbs[i].base_addr, lmbs[i].drc_index);
- lmbs[i].reserved = 0;
+ lmb->base_addr, lmb->drc_index);
+ drmem_remove_lmb_reservation(lmb);
}
rc = 0;
}
@@ -879,28 +799,22 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add, struct property *prop)
return rc;
}
-static int dlpar_memory_add_by_index(u32 drc_index, struct property *prop)
+static int dlpar_memory_add_by_index(u32 drc_index)
{
- struct of_drconf_cell *lmbs;
- u32 num_lmbs, *p;
- int i, lmb_found;
- int rc;
+ struct drmem_lmb *lmb;
+ int rc, lmb_found;
pr_info("Attempting to hot-add LMB, drc index %x\n", drc_index);
- p = prop->value;
- num_lmbs = *p++;
- lmbs = (struct of_drconf_cell *)p;
-
lmb_found = 0;
- for (i = 0; i < num_lmbs; i++) {
- if (lmbs[i].drc_index == drc_index) {
+ for_each_drmem_lmb(lmb) {
+ if (lmb->drc_index == drc_index) {
lmb_found = 1;
- rc = dlpar_acquire_drc(lmbs[i].drc_index);
+ rc = dlpar_acquire_drc(lmb->drc_index);
if (!rc) {
- rc = dlpar_add_lmb(&lmbs[i]);
+ rc = dlpar_add_lmb(lmb);
if (rc)
- dlpar_release_drc(lmbs[i].drc_index);
+ dlpar_release_drc(lmb->drc_index);
}
break;
@@ -914,18 +828,16 @@ static int dlpar_memory_add_by_index(u32 drc_index, struct property *prop)
pr_info("Failed to hot-add memory, drc index %x\n", drc_index);
else
pr_info("Memory at %llx (drc index %x) was hot-added\n",
- lmbs[i].base_addr, drc_index);
+ lmb->base_addr, drc_index);
return rc;
}
-static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index,
- struct property *prop)
+static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index)
{
- struct of_drconf_cell *lmbs;
- u32 num_lmbs, *p;
- int i, rc, start_lmb_found;
- int lmbs_available = 0, start_index = 0, end_index;
+ struct drmem_lmb *lmb, *start_lmb, *end_lmb;
+ int lmbs_available = 0;
+ int rc;
pr_info("Attempting to hot-add %u LMB(s) at index %x\n",
lmbs_to_add, drc_index);
@@ -933,29 +845,13 @@ static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index,
if (lmbs_to_add == 0)
return -EINVAL;
- p = prop->value;
- num_lmbs = *p++;
- lmbs = (struct of_drconf_cell *)p;
- start_lmb_found = 0;
-
- /* Navigate to drc_index */
- while (start_index < num_lmbs) {
- if (lmbs[start_index].drc_index == drc_index) {
- start_lmb_found = 1;
- break;
- }
-
- start_index++;
- }
-
- if (!start_lmb_found)
+ rc = get_lmb_range(drc_index, lmbs_to_add, &start_lmb, &end_lmb);
+ if (rc)
return -EINVAL;
- end_index = start_index + lmbs_to_add;
-
/* Validate that the LMBs in this range are not reserved */
- for (i = start_index; i < end_index; i++) {
- if (lmbs[i].flags & DRCONF_MEM_RESERVED)
+ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+ if (lmb->flags & DRCONF_MEM_RESERVED)
break;
lmbs_available++;
@@ -964,46 +860,48 @@ static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index,
if (lmbs_available < lmbs_to_add)
return -EINVAL;
- for (i = start_index; i < end_index; i++) {
- if (lmbs[i].flags & DRCONF_MEM_ASSIGNED)
+ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+ if (lmb->flags & DRCONF_MEM_ASSIGNED)
continue;
- rc = dlpar_acquire_drc(lmbs[i].drc_index);
+ rc = dlpar_acquire_drc(lmb->drc_index);
if (rc)
break;
- rc = dlpar_add_lmb(&lmbs[i]);
+ rc = dlpar_add_lmb(lmb);
if (rc) {
- dlpar_release_drc(lmbs[i].drc_index);
+ dlpar_release_drc(lmb->drc_index);
break;
}
- lmbs[i].reserved = 1;
+ drmem_mark_lmb_reserved(lmb);
}
if (rc) {
pr_err("Memory indexed-count-add failed, removing any added LMBs\n");
- for (i = start_index; i < end_index; i++) {
- if (!lmbs[i].reserved)
+ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+ if (!drmem_lmb_reserved(lmb))
continue;
- rc = dlpar_remove_lmb(&lmbs[i]);
+ rc = dlpar_remove_lmb(lmb);
if (rc)
pr_err("Failed to remove LMB, drc index %x\n",
- be32_to_cpu(lmbs[i].drc_index));
+ lmb->drc_index);
else
- dlpar_release_drc(lmbs[i].drc_index);
+ dlpar_release_drc(lmb->drc_index);
+
+ drmem_remove_lmb_reservation(lmb);
}
rc = -EINVAL;
} else {
- for (i = start_index; i < end_index; i++) {
- if (!lmbs[i].reserved)
+ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+ if (!drmem_lmb_reserved(lmb))
continue;
pr_info("Memory at %llx (drc index %x) was hot-added\n",
- lmbs[i].base_addr, lmbs[i].drc_index);
- lmbs[i].reserved = 0;
+ lmb->base_addr, lmb->drc_index);
+ drmem_remove_lmb_reservation(lmb);
}
}
@@ -1012,37 +910,23 @@ static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index,
int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
{
- struct device_node *dn;
- struct property *prop;
u32 count, drc_index;
int rc;
lock_device_hotplug();
- dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
- if (!dn) {
- rc = -EINVAL;
- goto dlpar_memory_out;
- }
-
- prop = dlpar_clone_drconf_property(dn);
- if (!prop) {
- rc = -EINVAL;
- goto dlpar_memory_out;
- }
-
switch (hp_elog->action) {
case PSERIES_HP_ELOG_ACTION_ADD:
if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT) {
count = hp_elog->_drc_u.drc_count;
- rc = dlpar_memory_add_by_count(count, prop);
+ rc = dlpar_memory_add_by_count(count);
} else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) {
drc_index = hp_elog->_drc_u.drc_index;
- rc = dlpar_memory_add_by_index(drc_index, prop);
+ rc = dlpar_memory_add_by_index(drc_index);
} else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_IC) {
count = hp_elog->_drc_u.ic.count;
drc_index = hp_elog->_drc_u.ic.index;
- rc = dlpar_memory_add_by_ic(count, drc_index, prop);
+ rc = dlpar_memory_add_by_ic(count, drc_index);
} else {
rc = -EINVAL;
}
@@ -1051,14 +935,14 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
case PSERIES_HP_ELOG_ACTION_REMOVE:
if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT) {
count = hp_elog->_drc_u.drc_count;
- rc = dlpar_memory_remove_by_count(count, prop);
+ rc = dlpar_memory_remove_by_count(count);
} else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) {
drc_index = hp_elog->_drc_u.drc_index;
- rc = dlpar_memory_remove_by_index(drc_index, prop);
+ rc = dlpar_memory_remove_by_index(drc_index);
} else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_IC) {
count = hp_elog->_drc_u.ic.count;
drc_index = hp_elog->_drc_u.ic.index;
- rc = dlpar_memory_remove_by_ic(count, drc_index, prop);
+ rc = dlpar_memory_remove_by_ic(count, drc_index);
} else {
rc = -EINVAL;
}
@@ -1066,7 +950,7 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
break;
case PSERIES_HP_ELOG_ACTION_READD:
drc_index = hp_elog->_drc_u.drc_index;
- rc = dlpar_memory_readd_by_index(drc_index, prop);
+ rc = dlpar_memory_readd_by_index(drc_index);
break;
default:
pr_err("Invalid action (%d) specified\n", hp_elog->action);
@@ -1074,10 +958,6 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
break;
}
- dlpar_free_property(prop);
-
-dlpar_memory_out:
- of_node_put(dn);
unlock_device_hotplug();
return rc;
}
@@ -1116,7 +996,7 @@ static int pseries_add_mem_node(struct device_node *np)
static int pseries_update_drconf_memory(struct of_reconfig_data *pr)
{
- struct of_drconf_cell *new_drmem, *old_drmem;
+ struct of_drconf_cell_v1 *new_drmem, *old_drmem;
unsigned long memblock_size;
u32 entries;
__be32 *p;
@@ -1139,11 +1019,11 @@ static int pseries_update_drconf_memory(struct of_reconfig_data *pr)
* of_drconf_cell's.
*/
entries = be32_to_cpu(*p++);
- old_drmem = (struct of_drconf_cell *)p;
+ old_drmem = (struct of_drconf_cell_v1 *)p;
p = (__be32 *)pr->prop->value;
p++;
- new_drmem = (struct of_drconf_cell *)p;
+ new_drmem = (struct of_drconf_cell_v1 *)p;
for (i = 0; i < entries; i++) {
if ((be32_to_cpu(old_drmem[i].flags) & DRCONF_MEM_ASSIGNED) &&
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
index 957ae347b0b3..89b7ce807e70 100644
--- a/arch/powerpc/platforms/pseries/hvCall_inst.c
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -163,7 +163,7 @@ static int __init hcall_inst_init(void)
for_each_possible_cpu(cpu) {
snprintf(cpu_name_buf, CPU_NAME_BUF_SIZE, "cpu%d", cpu);
- hcall_file = debugfs_create_file(cpu_name_buf, S_IRUGO,
+ hcall_file = debugfs_create_file(cpu_name_buf, 0444,
hcall_root,
per_cpu(hcall_stats, cpu),
&hcall_inst_seq_fops);
diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c
index 408a86044133..c7c1140c13b6 100644
--- a/arch/powerpc/platforms/pseries/ibmebus.c
+++ b/arch/powerpc/platforms/pseries/ibmebus.c
@@ -298,7 +298,7 @@ out:
return rc;
return count;
}
-static BUS_ATTR(probe, S_IWUSR, NULL, ibmebus_store_probe);
+static BUS_ATTR(probe, 0200, NULL, ibmebus_store_probe);
static ssize_t ibmebus_store_remove(struct bus_type *bus,
const char *buf, size_t count)
@@ -325,7 +325,7 @@ static ssize_t ibmebus_store_remove(struct bus_type *bus,
return -ENODEV;
}
}
-static BUS_ATTR(remove, S_IWUSR, NULL, ibmebus_store_remove);
+static BUS_ATTR(remove, 0200, NULL, ibmebus_store_remove);
static struct attribute *ibmbus_bus_attrs[] = {
&bus_attr_probe.attr,
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index eaa11334fc8c..06f02960b439 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -816,15 +816,15 @@ static void remove_ddw(struct device_node *np, bool remove_prop)
ret = tce_clearrange_multi_pSeriesLP(0,
1ULL << (be32_to_cpu(dwp->window_shift) - PAGE_SHIFT), dwp);
if (ret)
- pr_warning("%pOF failed to clear tces in window.\n",
- np);
+ pr_warn("%pOF failed to clear tces in window.\n",
+ np);
else
pr_debug("%pOF successfully cleared tces in window.\n",
np);
ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn);
if (ret)
- pr_warning("%pOF: failed to remove direct window: rtas returned "
+ pr_warn("%pOF: failed to remove direct window: rtas returned "
"%d to ibm,remove-pe-dma-window(%x) %llx\n",
np, ret, ddw_avail[2], liobn);
else
@@ -836,7 +836,7 @@ delprop:
if (remove_prop)
ret = of_remove_property(np, win64);
if (ret)
- pr_warning("%pOF: failed to remove direct window property: %d\n",
+ pr_warn("%pOF: failed to remove direct window property: %d\n",
np, ret);
}
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index b2706c483067..c508c938dc71 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -370,10 +370,10 @@ static void parse_system_parameter_string(struct seq_file *m)
*/
static int lparcfg_count_active_processors(void)
{
- struct device_node *cpus_dn = NULL;
+ struct device_node *cpus_dn;
int count = 0;
- while ((cpus_dn = of_find_node_by_type(cpus_dn, "cpu"))) {
+ for_each_node_by_type(cpus_dn, "cpu") {
#ifdef LPARCFG_DEBUG
printk(KERN_ERR "cpus_dn %p\n", cpus_dn);
#endif
@@ -697,11 +697,11 @@ static const struct file_operations lparcfg_fops = {
static int __init lparcfg_init(void)
{
- umode_t mode = S_IRUSR | S_IRGRP | S_IROTH;
+ umode_t mode = 0444;
/* Allow writing if we have FW_FEATURE_SPLPAR */
if (firmware_has_feature(FW_FEATURE_SPLPAR))
- mode |= S_IWUSR;
+ mode |= 0200;
if (!proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_fops)) {
printk(KERN_ERR "Failed to create powerpc/lparcfg\n");
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index f7042ad492ba..0f7fb7170b03 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -384,7 +384,7 @@ static ssize_t migration_store(struct class *class,
#define MIGRATION_API_VERSION 1
static CLASS_ATTR_WO(migration);
-static CLASS_ATTR_STRING(api_version, S_IRUGO, __stringify(MIGRATION_API_VERSION));
+static CLASS_ATTR_STRING(api_version, 0444, __stringify(MIGRATION_API_VERSION));
static int __init mobility_sysfs_init(void)
{
diff --git a/arch/powerpc/platforms/pseries/of_helpers.c b/arch/powerpc/platforms/pseries/of_helpers.c
index 7e75101fa522..6df192f38f80 100644
--- a/arch/powerpc/platforms/pseries/of_helpers.c
+++ b/arch/powerpc/platforms/pseries/of_helpers.c
@@ -3,6 +3,7 @@
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/of.h>
+#include <asm/prom.h>
#include "of_helpers.h"
@@ -37,3 +38,62 @@ struct device_node *pseries_of_derive_parent(const char *path)
kfree(parent_path);
return parent ? parent : ERR_PTR(-EINVAL);
}
+
+
+/* Helper Routines to convert between drc_index to cpu numbers */
+
+int of_read_drc_info_cell(struct property **prop, const __be32 **curval,
+ struct of_drc_info *data)
+{
+ const char *p;
+ const __be32 *p2;
+
+ if (!data)
+ return -EINVAL;
+
+ /* Get drc-type:encode-string */
+ p = data->drc_type = (char*) (*curval);
+ p = of_prop_next_string(*prop, p);
+ if (!p)
+ return -EINVAL;
+
+ /* Get drc-name-prefix:encode-string */
+ data->drc_name_prefix = (char *)p;
+ p = of_prop_next_string(*prop, p);
+ if (!p)
+ return -EINVAL;
+
+ /* Get drc-index-start:encode-int */
+ p2 = (const __be32 *)p;
+ p2 = of_prop_next_u32(*prop, p2, &data->drc_index_start);
+ if (!p2)
+ return -EINVAL;
+
+ /* Get drc-name-suffix-start:encode-int */
+ p2 = of_prop_next_u32(*prop, p2, &data->drc_name_suffix_start);
+ if (!p2)
+ return -EINVAL;
+
+ /* Get number-sequential-elements:encode-int */
+ p2 = of_prop_next_u32(*prop, p2, &data->num_sequential_elems);
+ if (!p2)
+ return -EINVAL;
+
+ /* Get sequential-increment:encode-int */
+ p2 = of_prop_next_u32(*prop, p2, &data->sequential_inc);
+ if (!p2)
+ return -EINVAL;
+
+ /* Get drc-power-domain:encode-int */
+ p2 = of_prop_next_u32(*prop, p2, &data->drc_power_domain);
+ if (!p2)
+ return -EINVAL;
+
+ /* Should now know end of current entry */
+ (*curval) = (void *)p2;
+ data->last_drc_index = data->drc_index_start +
+ ((data->num_sequential_elems - 1) * data->sequential_inc);
+
+ return 0;
+}
+EXPORT_SYMBOL(of_read_drc_info_cell);
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index 09eba5a9929a..eab96637d6cf 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -3,17 +3,17 @@
* Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
*
* pSeries specific routines for PCI.
- *
+ *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
- *
+ *
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
+ *
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
@@ -54,10 +54,174 @@ void pcibios_name_device(struct pci_dev *dev)
}
}
}
-}
+}
DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device);
#endif
+#ifdef CONFIG_PCI_IOV
+#define MAX_VFS_FOR_MAP_PE 256
+struct pe_map_bar_entry {
+ __be64 bar; /* Input: Virtual Function BAR */
+ __be16 rid; /* Input: Virtual Function Router ID */
+ __be16 pe_num; /* Output: Virtual Function PE Number */
+ __be32 reserved; /* Reserved Space */
+};
+
+int pseries_send_map_pe(struct pci_dev *pdev,
+ u16 num_vfs,
+ struct pe_map_bar_entry *vf_pe_array)
+{
+ struct pci_dn *pdn;
+ int rc;
+ unsigned long buid, addr;
+ int ibm_map_pes = rtas_token("ibm,open-sriov-map-pe-number");
+
+ if (ibm_map_pes == RTAS_UNKNOWN_SERVICE)
+ return -EINVAL;
+
+ pdn = pci_get_pdn(pdev);
+ addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
+ buid = pdn->phb->buid;
+ spin_lock(&rtas_data_buf_lock);
+ memcpy(rtas_data_buf, vf_pe_array,
+ RTAS_DATA_BUF_SIZE);
+ rc = rtas_call(ibm_map_pes, 5, 1, NULL, addr,
+ BUID_HI(buid), BUID_LO(buid),
+ rtas_data_buf,
+ num_vfs * sizeof(struct pe_map_bar_entry));
+ memcpy(vf_pe_array, rtas_data_buf, RTAS_DATA_BUF_SIZE);
+ spin_unlock(&rtas_data_buf_lock);
+
+ if (rc)
+ dev_err(&pdev->dev,
+ "%s: Failed to associate pes PE#%lx, rc=%x\n",
+ __func__, addr, rc);
+
+ return rc;
+}
+
+void pseries_set_pe_num(struct pci_dev *pdev, u16 vf_index, __be16 pe_num)
+{
+ struct pci_dn *pdn;
+
+ pdn = pci_get_pdn(pdev);
+ pdn->pe_num_map[vf_index] = be16_to_cpu(pe_num);
+ dev_dbg(&pdev->dev, "VF %04x:%02x:%02x.%x associated with PE#%x\n",
+ pci_domain_nr(pdev->bus),
+ pdev->bus->number,
+ PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)),
+ PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)),
+ pdn->pe_num_map[vf_index]);
+}
+
+int pseries_associate_pes(struct pci_dev *pdev, u16 num_vfs)
+{
+ struct pci_dn *pdn;
+ int i, rc, vf_index;
+ struct pe_map_bar_entry *vf_pe_array;
+ struct resource *res;
+ u64 size;
+
+ vf_pe_array = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
+ if (!vf_pe_array)
+ return -ENOMEM;
+
+ pdn = pci_get_pdn(pdev);
+ /* create firmware structure to associate pes */
+ for (vf_index = 0; vf_index < num_vfs; vf_index++) {
+ pdn->pe_num_map[vf_index] = IODA_INVALID_PE;
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+ res = &pdev->resource[i + PCI_IOV_RESOURCES];
+ if (!res->parent)
+ continue;
+ size = pcibios_iov_resource_alignment(pdev, i +
+ PCI_IOV_RESOURCES);
+ vf_pe_array[vf_index].bar =
+ cpu_to_be64(res->start + size * vf_index);
+ vf_pe_array[vf_index].rid =
+ cpu_to_be16((pci_iov_virtfn_bus(pdev, vf_index)
+ << 8) | pci_iov_virtfn_devfn(pdev,
+ vf_index));
+ vf_pe_array[vf_index].pe_num =
+ cpu_to_be16(IODA_INVALID_PE);
+ }
+ }
+
+ rc = pseries_send_map_pe(pdev, num_vfs, vf_pe_array);
+ /* Only zero is success */
+ if (!rc)
+ for (vf_index = 0; vf_index < num_vfs; vf_index++)
+ pseries_set_pe_num(pdev, vf_index,
+ vf_pe_array[vf_index].pe_num);
+
+ kfree(vf_pe_array);
+ return rc;
+}
+
+int pseries_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+ struct pci_dn *pdn;
+ int rc;
+ const int *max_vfs;
+ int max_config_vfs;
+ struct device_node *dn = pci_device_to_OF_node(pdev);
+
+ max_vfs = of_get_property(dn, "ibm,number-of-configurable-vfs", NULL);
+
+ if (!max_vfs)
+ return -EINVAL;
+
+ /* First integer stores max config */
+ max_config_vfs = of_read_number(&max_vfs[0], 1);
+ if (max_config_vfs < num_vfs && num_vfs > MAX_VFS_FOR_MAP_PE) {
+ dev_err(&pdev->dev,
+ "Num VFs %x > %x Configurable VFs\n",
+ num_vfs, (num_vfs > MAX_VFS_FOR_MAP_PE) ?
+ MAX_VFS_FOR_MAP_PE : max_config_vfs);
+ return -EINVAL;
+ }
+
+ pdn = pci_get_pdn(pdev);
+ pdn->pe_num_map = kmalloc_array(num_vfs,
+ sizeof(*pdn->pe_num_map),
+ GFP_KERNEL);
+ if (!pdn->pe_num_map)
+ return -ENOMEM;
+
+ rc = pseries_associate_pes(pdev, num_vfs);
+
+ /* Anything other than zero is failure */
+ if (rc) {
+ dev_err(&pdev->dev, "Failure to enable sriov: %x\n", rc);
+ kfree(pdn->pe_num_map);
+ } else {
+ pci_vf_drivers_autoprobe(pdev, false);
+ }
+
+ return rc;
+}
+
+int pseries_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+ /* Allocate PCI data */
+ add_dev_pci_data(pdev);
+ return pseries_pci_sriov_enable(pdev, num_vfs);
+}
+
+int pseries_pcibios_sriov_disable(struct pci_dev *pdev)
+{
+ struct pci_dn *pdn;
+
+ pdn = pci_get_pdn(pdev);
+ /* Releasing pe_num_map */
+ kfree(pdn->pe_num_map);
+ /* Release PCI data */
+ remove_dev_pci_data(pdev);
+ pci_vf_drivers_autoprobe(pdev, true);
+ return 0;
+}
+#endif
+
static void __init pSeries_request_regions(void)
{
if (!isa_io_base)
@@ -76,6 +240,11 @@ void __init pSeries_final_fixup(void)
pSeries_request_regions();
eeh_addr_cache_build();
+
+#ifdef CONFIG_PCI_IOV
+ ppc_md.pcibios_sriov_enable = pseries_pcibios_sriov_enable;
+ ppc_md.pcibios_sriov_disable = pseries_pcibios_sriov_disable;
+#endif
}
/*
diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c
index 35c891aabef0..6ed22127391b 100644
--- a/arch/powerpc/platforms/pseries/pseries_energy.c
+++ b/arch/powerpc/platforms/pseries/pseries_energy.c
@@ -22,6 +22,7 @@
#include <asm/page.h>
#include <asm/hvcall.h>
#include <asm/firmware.h>
+#include <asm/prom.h>
#define MODULE_VERS "1.0"
@@ -38,26 +39,58 @@ static int sysfs_entries;
static u32 cpu_to_drc_index(int cpu)
{
struct device_node *dn = NULL;
- const int *indexes;
- int i;
+ int thread_index;
int rc = 1;
u32 ret = 0;
dn = of_find_node_by_path("/cpus");
if (dn == NULL)
goto err;
- indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
- if (indexes == NULL)
- goto err_of_node_put;
+
/* Convert logical cpu number to core number */
- i = cpu_core_index_of_thread(cpu);
- /*
- * The first element indexes[0] is the number of drc_indexes
- * returned in the list. Hence i+1 will get the drc_index
- * corresponding to core number i.
- */
- WARN_ON(i > indexes[0]);
- ret = indexes[i + 1];
+ thread_index = cpu_core_index_of_thread(cpu);
+
+ if (firmware_has_feature(FW_FEATURE_DRC_INFO)) {
+ struct property *info = NULL;
+ struct of_drc_info drc;
+ int j;
+ u32 num_set_entries;
+ const __be32 *value;
+
+ info = of_find_property(dn, "ibm,drc-info", NULL);
+ if (info == NULL)
+ goto err_of_node_put;
+
+ value = of_prop_next_u32(info, NULL, &num_set_entries);
+ if (!value)
+ goto err_of_node_put;
+
+ for (j = 0; j < num_set_entries; j++) {
+
+ of_read_drc_info_cell(&info, &value, &drc);
+ if (strncmp(drc.drc_type, "CPU", 3))
+ goto err;
+
+ if (thread_index < drc.last_drc_index)
+ break;
+ }
+
+ ret = drc.drc_index_start + (thread_index * drc.sequential_inc);
+ } else {
+ const __be32 *indexes;
+
+ indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
+ if (indexes == NULL)
+ goto err_of_node_put;
+
+ /*
+ * The first element indexes[0] is the number of drc_indexes
+ * returned in the list. Hence thread_index+1 will get the
+ * drc_index corresponding to core number thread_index.
+ */
+ ret = indexes[thread_index + 1];
+ }
+
rc = 0;
err_of_node_put:
@@ -72,34 +105,71 @@ static int drc_index_to_cpu(u32 drc_index)
{
struct device_node *dn = NULL;
const int *indexes;
- int i, cpu = 0;
+ int thread_index = 0, cpu = 0;
int rc = 1;
dn = of_find_node_by_path("/cpus");
if (dn == NULL)
goto err;
- indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
- if (indexes == NULL)
- goto err_of_node_put;
- /*
- * First element in the array is the number of drc_indexes
- * returned. Search through the list to find the matching
- * drc_index and get the core number
- */
- for (i = 0; i < indexes[0]; i++) {
- if (indexes[i + 1] == drc_index)
+
+ if (firmware_has_feature(FW_FEATURE_DRC_INFO)) {
+ struct property *info = NULL;
+ struct of_drc_info drc;
+ int j;
+ u32 num_set_entries;
+ const __be32 *value;
+
+ info = of_find_property(dn, "ibm,drc-info", NULL);
+ if (info == NULL)
+ goto err_of_node_put;
+
+ value = of_prop_next_u32(info, NULL, &num_set_entries);
+ if (!value)
+ goto err_of_node_put;
+
+ for (j = 0; j < num_set_entries; j++) {
+
+ of_read_drc_info_cell(&info, &value, &drc);
+ if (strncmp(drc.drc_type, "CPU", 3))
+ goto err;
+
+ if (drc_index > drc.last_drc_index) {
+ cpu += drc.num_sequential_elems;
+ continue;
+ }
+ cpu += ((drc_index - drc.drc_index_start) /
+ drc.sequential_inc);
+
+ thread_index = cpu_first_thread_of_core(cpu);
+ rc = 0;
break;
+ }
+ } else {
+ unsigned long int i;
+
+ indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
+ if (indexes == NULL)
+ goto err_of_node_put;
+ /*
+ * First element in the array is the number of drc_indexes
+ * returned. Search through the list to find the matching
+ * drc_index and get the core number
+ */
+ for (i = 0; i < indexes[0]; i++) {
+ if (indexes[i + 1] == drc_index)
+ break;
+ }
+ /* Convert core number to logical cpu number */
+ thread_index = cpu_first_thread_of_core(i);
+ rc = 0;
}
- /* Convert core number to logical cpu number */
- cpu = cpu_first_thread_of_core(i);
- rc = 0;
err_of_node_put:
of_node_put(dn);
err:
if (rc)
printk(KERN_WARNING "drc_index_to_cpu(%d) failed", drc_index);
- return cpu;
+ return thread_index;
}
/*
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index f24d8159c9e1..0e0208117e77 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -405,7 +405,7 @@ static int proc_ppc64_create_ofdt(void)
{
struct proc_dir_entry *ent;
- ent = proc_create("powerpc/ofdt", S_IWUSR, NULL, &ofdt_fops);
+ ent = proc_create("powerpc/ofdt", 0200, NULL, &ofdt_fops);
if (ent)
proc_set_size(ent, 0);
diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c
index c47585a78b69..054ce7a16fc3 100644
--- a/arch/powerpc/platforms/pseries/scanlog.c
+++ b/arch/powerpc/platforms/pseries/scanlog.c
@@ -179,7 +179,7 @@ static int __init scanlog_init(void)
if (!scanlog_buffer)
goto err;
- ent = proc_create("powerpc/rtas/scan-log-dump", S_IRUSR, NULL,
+ ent = proc_create("powerpc/rtas/scan-log-dump", 0400, NULL,
&scanlog_fops);
if (!ent)
goto err;
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index ae4f596273b5..372d7ada1a0c 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -371,8 +371,8 @@ void pseries_disable_reloc_on_exc(void)
mdelay(get_longbusy_msecs(rc));
}
if (rc != H_SUCCESS)
- pr_warning("Warning: Failed to disable relocation on "
- "exceptions: %ld\n", rc);
+ pr_warn("Warning: Failed to disable relocation on exceptions: %ld\n",
+ rc);
}
EXPORT_SYMBOL(pseries_disable_reloc_on_exc);
@@ -492,6 +492,162 @@ static void pseries_setup_rfi_flush(void)
setup_rfi_flush(types, enable);
}
+#ifdef CONFIG_PCI_IOV
+enum rtas_iov_fw_value_map {
+ NUM_RES_PROPERTY = 0, /* Number of Resources */
+ LOW_INT = 1, /* Lowest 32 bits of Address */
+ START_OF_ENTRIES = 2, /* Always start of entry */
+ APERTURE_PROPERTY = 2, /* Start of entry+ to Aperture Size */
+ WDW_SIZE_PROPERTY = 4, /* Start of entry+ to Window Size */
+ NEXT_ENTRY = 7 /* Go to next entry on array */
+};
+
+enum get_iov_fw_value_index {
+ BAR_ADDRS = 1, /* Get Bar Address */
+ APERTURE_SIZE = 2, /* Get Aperture Size */
+ WDW_SIZE = 3 /* Get Window Size */
+};
+
+resource_size_t pseries_get_iov_fw_value(struct pci_dev *dev, int resno,
+ enum get_iov_fw_value_index value)
+{
+ const int *indexes;
+ struct device_node *dn = pci_device_to_OF_node(dev);
+ int i, num_res, ret = 0;
+
+ indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
+ if (!indexes)
+ return 0;
+
+ /*
+ * First element in the array is the number of Bars
+ * returned. Search through the list to find the matching
+ * bar
+ */
+ num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
+ if (resno >= num_res)
+ return 0; /* or an errror */
+
+ i = START_OF_ENTRIES + NEXT_ENTRY * resno;
+ switch (value) {
+ case BAR_ADDRS:
+ ret = of_read_number(&indexes[i], 2);
+ break;
+ case APERTURE_SIZE:
+ ret = of_read_number(&indexes[i + APERTURE_PROPERTY], 2);
+ break;
+ case WDW_SIZE:
+ ret = of_read_number(&indexes[i + WDW_SIZE_PROPERTY], 2);
+ break;
+ }
+
+ return ret;
+}
+
+void of_pci_set_vf_bar_size(struct pci_dev *dev, const int *indexes)
+{
+ struct resource *res;
+ resource_size_t base, size;
+ int i, r, num_res;
+
+ num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
+ num_res = min_t(int, num_res, PCI_SRIOV_NUM_BARS);
+ for (i = START_OF_ENTRIES, r = 0; r < num_res && r < PCI_SRIOV_NUM_BARS;
+ i += NEXT_ENTRY, r++) {
+ res = &dev->resource[r + PCI_IOV_RESOURCES];
+ base = of_read_number(&indexes[i], 2);
+ size = of_read_number(&indexes[i + APERTURE_PROPERTY], 2);
+ res->flags = pci_parse_of_flags(of_read_number
+ (&indexes[i + LOW_INT], 1), 0);
+ res->flags |= (IORESOURCE_MEM_64 | IORESOURCE_PCI_FIXED);
+ res->name = pci_name(dev);
+ res->start = base;
+ res->end = base + size - 1;
+ }
+}
+
+void of_pci_parse_iov_addrs(struct pci_dev *dev, const int *indexes)
+{
+ struct resource *res, *root, *conflict;
+ resource_size_t base, size;
+ int i, r, num_res;
+
+ /*
+ * First element in the array is the number of Bars
+ * returned. Search through the list to find the matching
+ * bars assign them from firmware into resources structure.
+ */
+ num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
+ for (i = START_OF_ENTRIES, r = 0; r < num_res && r < PCI_SRIOV_NUM_BARS;
+ i += NEXT_ENTRY, r++) {
+ res = &dev->resource[r + PCI_IOV_RESOURCES];
+ base = of_read_number(&indexes[i], 2);
+ size = of_read_number(&indexes[i + WDW_SIZE_PROPERTY], 2);
+ res->name = pci_name(dev);
+ res->start = base;
+ res->end = base + size - 1;
+ root = &iomem_resource;
+ dev_dbg(&dev->dev,
+ "pSeries IOV BAR %d: trying firmware assignment %pR\n",
+ r + PCI_IOV_RESOURCES, res);
+ conflict = request_resource_conflict(root, res);
+ if (conflict) {
+ dev_info(&dev->dev,
+ "BAR %d: %pR conflicts with %s %pR\n",
+ r + PCI_IOV_RESOURCES, res,
+ conflict->name, conflict);
+ res->flags |= IORESOURCE_UNSET;
+ }
+ }
+}
+
+static void pseries_pci_fixup_resources(struct pci_dev *pdev)
+{
+ const int *indexes;
+ struct device_node *dn = pci_device_to_OF_node(pdev);
+
+ /*Firmware must support open sriov otherwise dont configure*/
+ indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
+ if (!indexes)
+ return;
+ /* Assign the addresses from device tree*/
+ of_pci_set_vf_bar_size(pdev, indexes);
+}
+
+static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev)
+{
+ const int *indexes;
+ struct device_node *dn = pci_device_to_OF_node(pdev);
+
+ if (!pdev->is_physfn || pdev->is_added)
+ return;
+ /*Firmware must support open sriov otherwise dont configure*/
+ indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
+ if (!indexes)
+ return;
+ /* Assign the addresses from device tree*/
+ of_pci_parse_iov_addrs(pdev, indexes);
+}
+
+static resource_size_t pseries_pci_iov_resource_alignment(struct pci_dev *pdev,
+ int resno)
+{
+ const __be32 *reg;
+ struct device_node *dn = pci_device_to_OF_node(pdev);
+
+ /*Firmware must support open sriov otherwise report regular alignment*/
+ reg = of_get_property(dn, "ibm,is-open-sriov-pf", NULL);
+ if (!reg)
+ return pci_iov_resource_size(pdev, resno);
+
+ if (!pdev->is_physfn)
+ return 0;
+ return pseries_get_iov_fw_value(pdev,
+ resno - PCI_IOV_RESOURCES,
+ APERTURE_SIZE);
+}
+#endif
+
static void __init pSeries_setup_arch(void)
{
set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
@@ -525,6 +681,14 @@ static void __init pSeries_setup_arch(void)
vpa_init(boot_cpuid);
ppc_md.power_save = pseries_lpar_idle;
ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
+#ifdef CONFIG_PCI_IOV
+ ppc_md.pcibios_fixup_resources =
+ pseries_pci_fixup_resources;
+ ppc_md.pcibios_fixup_sriov =
+ pseries_pci_fixup_iov_resources;
+ ppc_md.pcibios_iov_resource_alignment =
+ pseries_pci_iov_resource_alignment;
+#endif
} else {
/* No special idle routine */
ppc_md.enable_pmcs = power4_enable_pmcs;
@@ -533,6 +697,12 @@ static void __init pSeries_setup_arch(void)
ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare;
}
+static void pseries_panic(char *str)
+{
+ panic_flush_kmsg_end();
+ rtas_os_term(str);
+}
+
static int __init pSeries_init_panel(void)
{
/* Manually leave the kernel version on the panel. */
@@ -761,7 +931,7 @@ define_machine(pseries) {
.pcibios_fixup = pSeries_final_fixup,
.restart = rtas_restart,
.halt = rtas_halt,
- .panic = rtas_os_term,
+ .panic = pseries_panic,
.get_boot_time = rtas_get_boot_time,
.get_rtc_time = rtas_get_rtc_time,
.set_rtc_time = rtas_set_rtc_time,
diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c
index 89726f07d249..52a021e1f86b 100644
--- a/arch/powerpc/platforms/pseries/suspend.c
+++ b/arch/powerpc/platforms/pseries/suspend.c
@@ -214,8 +214,7 @@ static ssize_t show_hibernate(struct device *dev,
return sprintf(buf, "%d\n", KERN_DT_UPDATE);
}
-static DEVICE_ATTR(hibernate, S_IWUSR | S_IRUGO,
- show_hibernate, store_hibernate);
+static DEVICE_ATTR(hibernate, 0644, show_hibernate, store_hibernate);
static struct bus_type suspend_subsys = {
.name = "power",
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index 0baba21404dc..9861407d644a 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -32,7 +32,6 @@ mv64x60-$(CONFIG_PCI) += mv64x60_pci.o
obj-$(CONFIG_MV64X60) += $(mv64x60-y) mv64x60_pic.o mv64x60_dev.o \
mv64x60_udbg.o
obj-$(CONFIG_RTC_DRV_CMOS) += rtc_cmos_setup.o
-obj-$(CONFIG_AXON_RAM) += axonram.o
obj-$(CONFIG_PPC_INDIRECT_PCI) += indirect_pci.o
obj-$(CONFIG_PPC_I8259) += i8259.o
@@ -43,7 +42,8 @@ obj-$(CONFIG_OF_RTC) += of_rtc.o
obj-$(CONFIG_CPM) += cpm_common.o
obj-$(CONFIG_CPM1) += cpm1.o
-obj-$(CONFIG_CPM2) += cpm2.o cpm2_pic.o
+obj-$(CONFIG_CPM2) += cpm2.o cpm2_pic.o cpm_gpio.o
+obj-$(CONFIG_8xx_GPIO) += cpm_gpio.o
obj-$(CONFIG_QUICC_ENGINE) += cpm_common.o
obj-$(CONFIG_PPC_DCR) += dcr.o
obj-$(CONFIG_UCODE_PATCH) += micropatch.o
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
deleted file mode 100644
index 1b307c80b401..000000000000
--- a/arch/powerpc/sysdev/axonram.c
+++ /dev/null
@@ -1,383 +0,0 @@
-/*
- * (C) Copyright IBM Deutschland Entwicklung GmbH 2006
- *
- * Author: Maxim Shchetynin <maxim@de.ibm.com>
- *
- * Axon DDR2 device driver.
- * It registers one block device per Axon's DDR2 memory bank found on a system.
- * Block devices are called axonram?, their major and minor numbers are
- * available in /proc/devices, /proc/partitions or in /sys/block/axonram?/dev.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/bio.h>
-#include <linux/blkdev.h>
-#include <linux/dax.h>
-#include <linux/device.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/genhd.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/ioport.h>
-#include <linux/irq.h>
-#include <linux/irqreturn.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/mod_devicetable.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/of_device.h>
-#include <linux/of_platform.h>
-#include <linux/pfn_t.h>
-#include <linux/uio.h>
-
-#include <asm/page.h>
-#include <asm/prom.h>
-
-#define AXON_RAM_MODULE_NAME "axonram"
-#define AXON_RAM_DEVICE_NAME "axonram"
-#define AXON_RAM_MINORS_PER_DISK 16
-#define AXON_RAM_BLOCK_SHIFT PAGE_SHIFT
-#define AXON_RAM_BLOCK_SIZE 1 << AXON_RAM_BLOCK_SHIFT
-#define AXON_RAM_SECTOR_SHIFT 9
-#define AXON_RAM_SECTOR_SIZE 1 << AXON_RAM_SECTOR_SHIFT
-#define AXON_RAM_IRQ_FLAGS IRQF_SHARED | IRQF_TRIGGER_RISING
-
-static int azfs_major, azfs_minor;
-
-struct axon_ram_bank {
- struct platform_device *device;
- struct gendisk *disk;
- struct dax_device *dax_dev;
- unsigned int irq_id;
- unsigned long ph_addr;
- unsigned long io_addr;
- unsigned long size;
- unsigned long ecc_counter;
-};
-
-static ssize_t
-axon_ram_sysfs_ecc(struct device *dev, struct device_attribute *attr, char *buf)
-{
- struct platform_device *device = to_platform_device(dev);
- struct axon_ram_bank *bank = device->dev.platform_data;
-
- BUG_ON(!bank);
-
- return sprintf(buf, "%ld\n", bank->ecc_counter);
-}
-
-static DEVICE_ATTR(ecc, S_IRUGO, axon_ram_sysfs_ecc, NULL);
-
-/**
- * axon_ram_irq_handler - interrupt handler for Axon RAM ECC
- * @irq: interrupt ID
- * @dev: pointer to of_device
- */
-static irqreturn_t
-axon_ram_irq_handler(int irq, void *dev)
-{
- struct platform_device *device = dev;
- struct axon_ram_bank *bank = device->dev.platform_data;
-
- BUG_ON(!bank);
-
- dev_err(&device->dev, "Correctable memory error occurred\n");
- bank->ecc_counter++;
- return IRQ_HANDLED;
-}
-
-/**
- * axon_ram_make_request - make_request() method for block device
- * @queue, @bio: see blk_queue_make_request()
- */
-static blk_qc_t
-axon_ram_make_request(struct request_queue *queue, struct bio *bio)
-{
- struct axon_ram_bank *bank = bio->bi_disk->private_data;
- unsigned long phys_mem, phys_end;
- void *user_mem;
- struct bio_vec vec;
- unsigned int transfered;
- struct bvec_iter iter;
-
- phys_mem = bank->io_addr + (bio->bi_iter.bi_sector <<
- AXON_RAM_SECTOR_SHIFT);
- phys_end = bank->io_addr + bank->size;
- transfered = 0;
- bio_for_each_segment(vec, bio, iter) {
- if (unlikely(phys_mem + vec.bv_len > phys_end)) {
- bio_io_error(bio);
- return BLK_QC_T_NONE;
- }
-
- user_mem = page_address(vec.bv_page) + vec.bv_offset;
- if (bio_data_dir(bio) == READ)
- memcpy(user_mem, (void *) phys_mem, vec.bv_len);
- else
- memcpy((void *) phys_mem, user_mem, vec.bv_len);
-
- phys_mem += vec.bv_len;
- transfered += vec.bv_len;
- }
- bio_endio(bio);
- return BLK_QC_T_NONE;
-}
-
-static const struct block_device_operations axon_ram_devops = {
- .owner = THIS_MODULE,
-};
-
-static long
-__axon_ram_direct_access(struct axon_ram_bank *bank, pgoff_t pgoff, long nr_pages,
- void **kaddr, pfn_t *pfn)
-{
- resource_size_t offset = pgoff * PAGE_SIZE;
-
- *kaddr = (void *) bank->io_addr + offset;
- *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV);
- return (bank->size - offset) / PAGE_SIZE;
-}
-
-static long
-axon_ram_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
- void **kaddr, pfn_t *pfn)
-{
- struct axon_ram_bank *bank = dax_get_private(dax_dev);
-
- return __axon_ram_direct_access(bank, pgoff, nr_pages, kaddr, pfn);
-}
-
-static size_t axon_ram_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
- void *addr, size_t bytes, struct iov_iter *i)
-{
- return copy_from_iter(addr, bytes, i);
-}
-
-static const struct dax_operations axon_ram_dax_ops = {
- .direct_access = axon_ram_dax_direct_access,
- .copy_from_iter = axon_ram_copy_from_iter,
-};
-
-/**
- * axon_ram_probe - probe() method for platform driver
- * @device: see platform_driver method
- */
-static int axon_ram_probe(struct platform_device *device)
-{
- static int axon_ram_bank_id = -1;
- struct axon_ram_bank *bank;
- struct resource resource;
- int rc;
-
- axon_ram_bank_id++;
-
- dev_info(&device->dev, "Found memory controller on %pOF\n",
- device->dev.of_node);
-
- bank = kzalloc(sizeof(*bank), GFP_KERNEL);
- if (!bank)
- return -ENOMEM;
-
- device->dev.platform_data = bank;
-
- bank->device = device;
-
- if (of_address_to_resource(device->dev.of_node, 0, &resource) != 0) {
- dev_err(&device->dev, "Cannot access device tree\n");
- rc = -EFAULT;
- goto failed;
- }
-
- bank->size = resource_size(&resource);
-
- if (bank->size == 0) {
- dev_err(&device->dev, "No DDR2 memory found for %s%d\n",
- AXON_RAM_DEVICE_NAME, axon_ram_bank_id);
- rc = -ENODEV;
- goto failed;
- }
-
- dev_info(&device->dev, "Register DDR2 memory device %s%d with %luMB\n",
- AXON_RAM_DEVICE_NAME, axon_ram_bank_id, bank->size >> 20);
-
- bank->ph_addr = resource.start;
- bank->io_addr = (unsigned long) ioremap_prot(
- bank->ph_addr, bank->size, _PAGE_NO_CACHE);
- if (bank->io_addr == 0) {
- dev_err(&device->dev, "ioremap() failed\n");
- rc = -EFAULT;
- goto failed;
- }
-
- bank->disk = alloc_disk(AXON_RAM_MINORS_PER_DISK);
- if (bank->disk == NULL) {
- dev_err(&device->dev, "Cannot register disk\n");
- rc = -EFAULT;
- goto failed;
- }
-
-
- bank->disk->major = azfs_major;
- bank->disk->first_minor = azfs_minor;
- bank->disk->fops = &axon_ram_devops;
- bank->disk->private_data = bank;
-
- sprintf(bank->disk->disk_name, "%s%d",
- AXON_RAM_DEVICE_NAME, axon_ram_bank_id);
-
- bank->dax_dev = alloc_dax(bank, bank->disk->disk_name,
- &axon_ram_dax_ops);
- if (!bank->dax_dev) {
- rc = -ENOMEM;
- goto failed;
- }
-
- bank->disk->queue = blk_alloc_queue(GFP_KERNEL);
- if (bank->disk->queue == NULL) {
- dev_err(&device->dev, "Cannot register disk queue\n");
- rc = -EFAULT;
- goto failed;
- }
-
- set_capacity(bank->disk, bank->size >> AXON_RAM_SECTOR_SHIFT);
- blk_queue_make_request(bank->disk->queue, axon_ram_make_request);
- blk_queue_logical_block_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE);
- device_add_disk(&device->dev, bank->disk);
-
- bank->irq_id = irq_of_parse_and_map(device->dev.of_node, 0);
- if (!bank->irq_id) {
- dev_err(&device->dev, "Cannot access ECC interrupt ID\n");
- rc = -EFAULT;
- goto failed;
- }
-
- rc = request_irq(bank->irq_id, axon_ram_irq_handler,
- AXON_RAM_IRQ_FLAGS, bank->disk->disk_name, device);
- if (rc != 0) {
- dev_err(&device->dev, "Cannot register ECC interrupt handler\n");
- bank->irq_id = 0;
- rc = -EFAULT;
- goto failed;
- }
-
- rc = device_create_file(&device->dev, &dev_attr_ecc);
- if (rc != 0) {
- dev_err(&device->dev, "Cannot create sysfs file\n");
- rc = -EFAULT;
- goto failed;
- }
-
- azfs_minor += bank->disk->minors;
-
- return 0;
-
-failed:
- if (bank->irq_id)
- free_irq(bank->irq_id, device);
- if (bank->disk != NULL) {
- if (bank->disk->major > 0)
- unregister_blkdev(bank->disk->major,
- bank->disk->disk_name);
- if (bank->disk->flags & GENHD_FL_UP)
- del_gendisk(bank->disk);
- put_disk(bank->disk);
- }
- kill_dax(bank->dax_dev);
- put_dax(bank->dax_dev);
- device->dev.platform_data = NULL;
- if (bank->io_addr != 0)
- iounmap((void __iomem *) bank->io_addr);
- kfree(bank);
- return rc;
-}
-
-/**
- * axon_ram_remove - remove() method for platform driver
- * @device: see of_platform_driver method
- */
-static int
-axon_ram_remove(struct platform_device *device)
-{
- struct axon_ram_bank *bank = device->dev.platform_data;
-
- BUG_ON(!bank || !bank->disk);
-
- device_remove_file(&device->dev, &dev_attr_ecc);
- free_irq(bank->irq_id, device);
- kill_dax(bank->dax_dev);
- put_dax(bank->dax_dev);
- del_gendisk(bank->disk);
- put_disk(bank->disk);
- iounmap((void __iomem *) bank->io_addr);
- kfree(bank);
-
- return 0;
-}
-
-static const struct of_device_id axon_ram_device_id[] = {
- {
- .type = "dma-memory"
- },
- {}
-};
-MODULE_DEVICE_TABLE(of, axon_ram_device_id);
-
-static struct platform_driver axon_ram_driver = {
- .probe = axon_ram_probe,
- .remove = axon_ram_remove,
- .driver = {
- .name = AXON_RAM_MODULE_NAME,
- .of_match_table = axon_ram_device_id,
- },
-};
-
-/**
- * axon_ram_init
- */
-static int __init
-axon_ram_init(void)
-{
- azfs_major = register_blkdev(azfs_major, AXON_RAM_DEVICE_NAME);
- if (azfs_major < 0) {
- printk(KERN_ERR "%s cannot become block device major number\n",
- AXON_RAM_MODULE_NAME);
- return -EFAULT;
- }
- azfs_minor = 0;
-
- return platform_driver_register(&axon_ram_driver);
-}
-
-/**
- * axon_ram_exit
- */
-static void __exit
-axon_ram_exit(void)
-{
- platform_driver_unregister(&axon_ram_driver);
- unregister_blkdev(azfs_major, AXON_RAM_DEVICE_NAME);
-}
-
-module_init(axon_ram_init);
-module_exit(axon_ram_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Maxim Shchetynin <maxim@de.ibm.com>");
-MODULE_DESCRIPTION("Axon DDR2 RAM device driver for IBM Cell BE");
diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c
index c6f154b602fb..5240d3a74a10 100644
--- a/arch/powerpc/sysdev/cpm1.c
+++ b/arch/powerpc/sysdev/cpm1.c
@@ -629,8 +629,9 @@ static int cpm1_gpio16_dir_in(struct gpio_chip *gc, unsigned int gpio)
return 0;
}
-int cpm1_gpiochip_add16(struct device_node *np)
+int cpm1_gpiochip_add16(struct device *dev)
{
+ struct device_node *np = dev->of_node;
struct cpm1_gpio16_chip *cpm1_gc;
struct of_mm_gpio_chip *mm_gc;
struct gpio_chip *gc;
@@ -660,6 +661,8 @@ int cpm1_gpiochip_add16(struct device_node *np)
gc->get = cpm1_gpio16_get;
gc->set = cpm1_gpio16_set;
gc->to_irq = cpm1_gpio16_to_irq;
+ gc->parent = dev;
+ gc->owner = THIS_MODULE;
return of_mm_gpiochip_add_data(np, mm_gc, cpm1_gc);
}
@@ -755,8 +758,9 @@ static int cpm1_gpio32_dir_in(struct gpio_chip *gc, unsigned int gpio)
return 0;
}
-int cpm1_gpiochip_add32(struct device_node *np)
+int cpm1_gpiochip_add32(struct device *dev)
{
+ struct device_node *np = dev->of_node;
struct cpm1_gpio32_chip *cpm1_gc;
struct of_mm_gpio_chip *mm_gc;
struct gpio_chip *gc;
@@ -776,31 +780,10 @@ int cpm1_gpiochip_add32(struct device_node *np)
gc->direction_output = cpm1_gpio32_dir_out;
gc->get = cpm1_gpio32_get;
gc->set = cpm1_gpio32_set;
+ gc->parent = dev;
+ gc->owner = THIS_MODULE;
return of_mm_gpiochip_add_data(np, mm_gc, cpm1_gc);
}
-static int cpm_init_par_io(void)
-{
- struct device_node *np;
-
- for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-a")
- cpm1_gpiochip_add16(np);
-
- for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-b")
- cpm1_gpiochip_add32(np);
-
- for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-c")
- cpm1_gpiochip_add16(np);
-
- for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-d")
- cpm1_gpiochip_add16(np);
-
- /* Port E uses CPM2 layout */
- for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-e")
- cpm2_gpiochip_add32(np);
- return 0;
-}
-arch_initcall(cpm_init_par_io);
-
#endif /* CONFIG_8xx_GPIO */
diff --git a/arch/powerpc/sysdev/cpm2.c b/arch/powerpc/sysdev/cpm2.c
index f78ff841652c..07718b9a2c99 100644
--- a/arch/powerpc/sysdev/cpm2.c
+++ b/arch/powerpc/sysdev/cpm2.c
@@ -354,14 +354,3 @@ void cpm2_set_pin(int port, int pin, int flags)
else
clrbits32(&iop[port].odr, pin);
}
-
-static int cpm_init_par_io(void)
-{
- struct device_node *np;
-
- for_each_compatible_node(np, NULL, "fsl,cpm2-pario-bank")
- cpm2_gpiochip_add32(np);
- return 0;
-}
-arch_initcall(cpm_init_par_io);
-
diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c
index 51bf749a4f3a..b74508175b67 100644
--- a/arch/powerpc/sysdev/cpm_common.c
+++ b/arch/powerpc/sysdev/cpm_common.c
@@ -190,8 +190,9 @@ static int cpm2_gpio32_dir_in(struct gpio_chip *gc, unsigned int gpio)
return 0;
}
-int cpm2_gpiochip_add32(struct device_node *np)
+int cpm2_gpiochip_add32(struct device *dev)
{
+ struct device_node *np = dev->of_node;
struct cpm2_gpio32_chip *cpm2_gc;
struct of_mm_gpio_chip *mm_gc;
struct gpio_chip *gc;
@@ -211,6 +212,8 @@ int cpm2_gpiochip_add32(struct device_node *np)
gc->direction_output = cpm2_gpio32_dir_out;
gc->get = cpm2_gpio32_get;
gc->set = cpm2_gpio32_set;
+ gc->parent = dev;
+ gc->owner = THIS_MODULE;
return of_mm_gpiochip_add_data(np, mm_gc, cpm2_gc);
}
diff --git a/arch/powerpc/sysdev/cpm_gpio.c b/arch/powerpc/sysdev/cpm_gpio.c
new file mode 100644
index 000000000000..0badc90be666
--- /dev/null
+++ b/arch/powerpc/sysdev/cpm_gpio.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Common CPM GPIO wrapper for the CPM GPIO ports
+ *
+ * Author: Christophe Leroy <christophe.leroy@c-s.fr>
+ *
+ * Copyright 2017 CS Systemes d'Information.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/of_device.h>
+
+#include <asm/cpm.h>
+#ifdef CONFIG_8xx_GPIO
+#include <asm/cpm1.h>
+#endif
+
+static int cpm_gpio_probe(struct platform_device *ofdev)
+{
+ struct device *dev = &ofdev->dev;
+ int (*gp_add)(struct device *dev) = of_device_get_match_data(dev);
+
+ if (!gp_add)
+ return -ENODEV;
+
+ return gp_add(dev);
+}
+
+static const struct of_device_id cpm_gpio_match[] = {
+#ifdef CONFIG_8xx_GPIO
+ {
+ .compatible = "fsl,cpm1-pario-bank-a",
+ .data = cpm1_gpiochip_add16,
+ },
+ {
+ .compatible = "fsl,cpm1-pario-bank-b",
+ .data = cpm1_gpiochip_add32,
+ },
+ {
+ .compatible = "fsl,cpm1-pario-bank-c",
+ .data = cpm1_gpiochip_add16,
+ },
+ {
+ .compatible = "fsl,cpm1-pario-bank-d",
+ .data = cpm1_gpiochip_add16,
+ },
+ /* Port E uses CPM2 layout */
+ {
+ .compatible = "fsl,cpm1-pario-bank-e",
+ .data = cpm2_gpiochip_add32,
+ },
+#endif
+ {
+ .compatible = "fsl,cpm2-pario-bank",
+ .data = cpm2_gpiochip_add32,
+ },
+ {},
+};
+MODULE_DEVICE_TABLE(of, cpm_gpio_match);
+
+static struct platform_driver cpm_gpio_driver = {
+ .probe = cpm_gpio_probe,
+ .driver = {
+ .name = "cpm-gpio",
+ .owner = THIS_MODULE,
+ .of_match_table = cpm_gpio_match,
+ },
+};
+
+static int __init cpm_gpio_init(void)
+{
+ return platform_driver_register(&cpm_gpio_driver);
+}
+arch_initcall(cpm_gpio_init);
+
+MODULE_AUTHOR("Christophe Leroy <christophe.leroy@c-s.fr>");
+MODULE_DESCRIPTION("Driver for CPM GPIO");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:cpm-gpio");
diff --git a/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c b/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c
index 1707bf04dec6..94278e8af192 100644
--- a/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c
+++ b/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c
@@ -56,17 +56,16 @@ static ssize_t fsl_timer_wakeup_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct timeval interval;
- int val = 0;
+ time64_t interval = 0;
mutex_lock(&sysfs_lock);
if (fsl_wakeup->timer) {
mpic_get_remain_time(fsl_wakeup->timer, &interval);
- val = interval.tv_sec + 1;
+ interval++;
}
mutex_unlock(&sysfs_lock);
- return sprintf(buf, "%d\n", val);
+ return sprintf(buf, "%lld\n", interval);
}
static ssize_t fsl_timer_wakeup_store(struct device *dev,
@@ -74,11 +73,10 @@ static ssize_t fsl_timer_wakeup_store(struct device *dev,
const char *buf,
size_t count)
{
- struct timeval interval;
+ time64_t interval;
int ret;
- interval.tv_usec = 0;
- if (kstrtol(buf, 0, &interval.tv_sec))
+ if (kstrtoll(buf, 0, &interval))
return -EINVAL;
mutex_lock(&sysfs_lock);
@@ -89,13 +87,13 @@ static ssize_t fsl_timer_wakeup_store(struct device *dev,
fsl_wakeup->timer = NULL;
}
- if (!interval.tv_sec) {
+ if (!interval) {
mutex_unlock(&sysfs_lock);
return count;
}
fsl_wakeup->timer = mpic_request_timer(fsl_mpic_timer_irq,
- fsl_wakeup, &interval);
+ fsl_wakeup, interval);
if (!fsl_wakeup->timer) {
mutex_unlock(&sysfs_lock);
return -EINVAL;
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index 61e07c78d64f..918be816b097 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -448,7 +448,7 @@ static void setup_pci_atmu(struct pci_controller *hose)
#endif
/* adjusting outbound windows could reclaim space in mem map */
if (paddr_hi < 0xffffffffull)
- pr_warning("%pOF: WARNING: Outbound window cfg leaves "
+ pr_warn("%pOF: WARNING: Outbound window cfg leaves "
"gaps in memory map. Adjusting the memory map "
"could reduce unnecessary bounce buffering.\n",
hose->dn);
@@ -531,7 +531,7 @@ int fsl_add_bridge(struct platform_device *pdev, int is_primary)
dev = pdev->dev.of_node;
if (!of_device_is_available(dev)) {
- pr_warning("%pOF: disabled\n", dev);
+ pr_warn("%pOF: disabled\n", dev);
return -ENODEV;
}
@@ -808,8 +808,8 @@ int __init mpc83xx_add_bridge(struct device_node *dev)
is_mpc83xx_pci = 1;
if (!of_device_is_available(dev)) {
- pr_warning("%pOF: disabled by the firmware.\n",
- dev);
+ pr_warn("%pOF: disabled by the firmware.\n",
+ dev);
return -ENODEV;
}
pr_debug("Adding PCI host bridge %pOF\n", dev);
@@ -1070,7 +1070,7 @@ int fsl_pci_mcheck_exception(struct pt_regs *regs)
if (is_in_pci_mem_space(addr)) {
if (user_mode(regs)) {
pagefault_disable();
- ret = get_user(regs->nip, &inst);
+ ret = get_user(inst, (__u32 __user *)regs->nip);
pagefault_enable();
} else {
ret = probe_kernel_address((void *)regs->nip, inst);
@@ -1304,10 +1304,8 @@ static int add_err_dev(struct platform_device *pdev)
pdev->resource,
pdev->num_resources,
&pd, sizeof(pd));
- if (IS_ERR(errdev))
- return PTR_ERR(errdev);
- return 0;
+ return PTR_ERR_OR_ZERO(errdev);
}
static int fsl_pci_probe(struct platform_device *pdev)
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index ead3e2549ebf..73067805300a 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -1008,9 +1008,8 @@ static int mpic_host_map(struct irq_domain *h, unsigned int virq,
if (hw == mpic->spurious_vec)
return -EINVAL;
if (mpic->protected && test_bit(hw, mpic->protected)) {
- pr_warning("mpic: Mapping of source 0x%x failed, "
- "source protected by firmware !\n",\
- (unsigned int)hw);
+ pr_warn("mpic: Mapping of source 0x%x failed, source protected by firmware !\n",
+ (unsigned int)hw);
return -EPERM;
}
@@ -1040,9 +1039,8 @@ static int mpic_host_map(struct irq_domain *h, unsigned int virq,
return 0;
if (hw >= mpic->num_sources) {
- pr_warning("mpic: Mapping of source 0x%x failed, "
- "source out of range !\n",\
- (unsigned int)hw);
+ pr_warn("mpic: Mapping of source 0x%x failed, source out of range !\n",
+ (unsigned int)hw);
return -EINVAL;
}
diff --git a/arch/powerpc/sysdev/mpic_timer.c b/arch/powerpc/sysdev/mpic_timer.c
index a418579591be..87e7c42777a8 100644
--- a/arch/powerpc/sysdev/mpic_timer.c
+++ b/arch/powerpc/sysdev/mpic_timer.c
@@ -47,9 +47,6 @@
#define MAX_TICKS_CASCADE (~0U)
#define TIMER_OFFSET(num) (1 << (TIMERS_PER_GROUP - 1 - num))
-/* tv_usec should be less than ONE_SECOND, otherwise use tv_sec */
-#define ONE_SECOND 1000000
-
struct timer_regs {
u32 gtccr;
u32 res0[3];
@@ -90,51 +87,23 @@ static struct cascade_priv cascade_timer[] = {
static LIST_HEAD(timer_group_list);
static void convert_ticks_to_time(struct timer_group_priv *priv,
- const u64 ticks, struct timeval *time)
+ const u64 ticks, time64_t *time)
{
- u64 tmp_sec;
-
- time->tv_sec = (__kernel_time_t)div_u64(ticks, priv->timerfreq);
- tmp_sec = (u64)time->tv_sec * (u64)priv->timerfreq;
-
- time->tv_usec = 0;
-
- if (tmp_sec <= ticks)
- time->tv_usec = (__kernel_suseconds_t)
- div_u64((ticks - tmp_sec) * 1000000, priv->timerfreq);
-
- return;
+ *time = (u64)div_u64(ticks, priv->timerfreq);
}
/* the time set by the user is converted to "ticks" */
static int convert_time_to_ticks(struct timer_group_priv *priv,
- const struct timeval *time, u64 *ticks)
+ time64_t time, u64 *ticks)
{
u64 max_value; /* prevent u64 overflow */
- u64 tmp = 0;
-
- u64 tmp_sec;
- u64 tmp_ms;
- u64 tmp_us;
max_value = div_u64(ULLONG_MAX, priv->timerfreq);
- if (time->tv_sec > max_value ||
- (time->tv_sec == max_value && time->tv_usec > 0))
+ if (time > max_value)
return -EINVAL;
- tmp_sec = (u64)time->tv_sec * (u64)priv->timerfreq;
- tmp += tmp_sec;
-
- tmp_ms = time->tv_usec / 1000;
- tmp_ms = div_u64((u64)tmp_ms * (u64)priv->timerfreq, 1000);
- tmp += tmp_ms;
-
- tmp_us = time->tv_usec % 1000;
- tmp_us = div_u64((u64)tmp_us * (u64)priv->timerfreq, 1000000);
- tmp += tmp_us;
-
- *ticks = tmp;
+ *ticks = (u64)time * (u64)priv->timerfreq;
return 0;
}
@@ -223,7 +192,7 @@ static struct mpic_timer *get_cascade_timer(struct timer_group_priv *priv,
return allocated_timer;
}
-static struct mpic_timer *get_timer(const struct timeval *time)
+static struct mpic_timer *get_timer(time64_t time)
{
struct timer_group_priv *priv;
struct mpic_timer *timer;
@@ -277,7 +246,7 @@ static struct mpic_timer *get_timer(const struct timeval *time)
* @handle: the timer to be started.
*
* It will do ->fn(->dev) callback from the hardware interrupt at
- * the ->timeval point in the future.
+ * the 'time64_t' point in the future.
*/
void mpic_start_timer(struct mpic_timer *handle)
{
@@ -319,7 +288,7 @@ EXPORT_SYMBOL(mpic_stop_timer);
*
* Query timer remaining time.
*/
-void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time)
+void mpic_get_remain_time(struct mpic_timer *handle, time64_t *time)
{
struct timer_group_priv *priv = container_of(handle,
struct timer_group_priv, timer[handle->num]);
@@ -391,7 +360,7 @@ EXPORT_SYMBOL(mpic_free_timer);
* else "handle" on success.
*/
struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev,
- const struct timeval *time)
+ time64_t time)
{
struct mpic_timer *allocated_timer;
int ret;
@@ -399,11 +368,7 @@ struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev,
if (list_empty(&timer_group_list))
return NULL;
- if (!(time->tv_sec + time->tv_usec) ||
- time->tv_sec < 0 || time->tv_usec < 0)
- return NULL;
-
- if (time->tv_usec > ONE_SECOND)
+ if (time < 0)
return NULL;
allocated_timer = get_timer(time);
diff --git a/arch/powerpc/sysdev/mv64x60_pci.c b/arch/powerpc/sysdev/mv64x60_pci.c
index d52b3b81e05f..50c411b1761e 100644
--- a/arch/powerpc/sysdev/mv64x60_pci.c
+++ b/arch/powerpc/sysdev/mv64x60_pci.c
@@ -73,7 +73,7 @@ static ssize_t mv64x60_hs_reg_write(struct file *filp, struct kobject *kobj,
static const struct bin_attribute mv64x60_hs_reg_attr = { /* Hotswap register */
.attr = {
.name = "hs_reg",
- .mode = S_IRUGO | S_IWUSR,
+ .mode = 0644,
},
.size = MV64X60_VAL_LEN_MAX,
.read = mv64x60_hs_reg_read,
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
index 2bfb9968d562..1459f4e8b698 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -241,18 +241,16 @@ static int __init icp_native_map_one_cpu(int hw_id, unsigned long addr,
cpu, hw_id);
if (!request_mem_region(addr, size, rname)) {
- pr_warning("icp_native: Could not reserve ICP MMIO"
- " for CPU %d, interrupt server #0x%x\n",
- cpu, hw_id);
+ pr_warn("icp_native: Could not reserve ICP MMIO for CPU %d, interrupt server #0x%x\n",
+ cpu, hw_id);
return -EBUSY;
}
icp_native_regs[cpu] = ioremap(addr, size);
kvmppc_set_xics_phys(cpu, addr);
if (!icp_native_regs[cpu]) {
- pr_warning("icp_native: Failed ioremap for CPU %d, "
- "interrupt server #0x%x, addr %#lx\n",
- cpu, hw_id, addr);
+ pr_warn("icp_native: Failed ioremap for CPU %d, interrupt server #0x%x, addr %#lx\n",
+ cpu, hw_id, addr);
release_mem_region(addr, size);
return -ENOMEM;
}
diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c
index 1c6bf4b66f56..f85f916ba432 100644
--- a/arch/powerpc/sysdev/xics/ics-opal.c
+++ b/arch/powerpc/sysdev/xics/ics-opal.c
@@ -131,8 +131,8 @@ static int ics_opal_set_affinity(struct irq_data *d,
wanted_server = xics_get_irq_server(d->irq, cpumask, 1);
if (wanted_server < 0) {
- pr_warning("%s: No online cpus in the mask %*pb for irq %d\n",
- __func__, cpumask_pr_args(cpumask), d->irq);
+ pr_warn("%s: No online cpus in the mask %*pb for irq %d\n",
+ __func__, cpumask_pr_args(cpumask), d->irq);
return -1;
}
server = ics_opal_mangle_server(wanted_server);
diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c
index 42e0c56ff81c..6aabc74688a6 100644
--- a/arch/powerpc/sysdev/xics/ics-rtas.c
+++ b/arch/powerpc/sysdev/xics/ics-rtas.c
@@ -141,8 +141,8 @@ static int ics_rtas_set_affinity(struct irq_data *d,
irq_server = xics_get_irq_server(d->irq, cpumask, 1);
if (irq_server == -1) {
- pr_warning("%s: No online cpus in the mask %*pb for irq %d\n",
- __func__, cpumask_pr_args(cpumask), d->irq);
+ pr_warn("%s: No online cpus in the mask %*pb for irq %d\n",
+ __func__, cpumask_pr_args(cpumask), d->irq);
return -1;
}
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index ffe138b8b9dc..77e864d5506d 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -243,8 +243,8 @@ void xics_migrate_irqs_away(void)
/* This is expected during cpu offline. */
if (cpu_online(cpu))
- pr_warning("IRQ %u affinity broken off cpu %u\n",
- virq, cpu);
+ pr_warn("IRQ %u affinity broken off cpu %u\n",
+ virq, cpu);
/* Reset affinity to all cpus */
raw_spin_unlock_irqrestore(&desc->lock, flags);
@@ -466,7 +466,7 @@ void __init xics_init(void)
rc = icp_opal_init();
}
if (rc < 0) {
- pr_warning("XICS: Cannot find a Presentation Controller !\n");
+ pr_warn("XICS: Cannot find a Presentation Controller !\n");
return;
}
@@ -481,7 +481,7 @@ void __init xics_init(void)
if (rc < 0)
rc = ics_opal_init();
if (rc < 0)
- pr_warning("XICS: Cannot find a Source Controller !\n");
+ pr_warn("XICS: Cannot find a Source Controller !\n");
/* Initialize common bits */
xics_get_server_size();
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index a3b8d7d1316e..40c06110821c 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -367,7 +367,8 @@ static void xive_irq_eoi(struct irq_data *d)
* EOI the source if it hasn't been disabled and hasn't
* been passed-through to a KVM guest
*/
- if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d))
+ if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) &&
+ !(xd->flags & XIVE_IRQ_NO_EOI))
xive_do_source_eoi(irqd_to_hwirq(d), xd);
/*
@@ -1269,11 +1270,6 @@ static void xive_setup_cpu(void)
{
struct xive_cpu *xc = __this_cpu_read(xive_cpu);
- /* Debug: Dump the TM state */
- pr_devel("CPU %d [HW 0x%02x] VT=%02x\n",
- smp_processor_id(), hard_smp_processor_id(),
- in_8(xive_tima + xive_tima_offset + TM_WORD2));
-
/* The backend might have additional things to do */
if (xive_ops->setup_cpu)
xive_ops->setup_cpu(smp_processor_id(), xc);
diff --git a/arch/powerpc/xmon/ppc-dis.c b/arch/powerpc/xmon/ppc-dis.c
index 31db8c072acd..9deea5ee13f6 100644
--- a/arch/powerpc/xmon/ppc-dis.c
+++ b/arch/powerpc/xmon/ppc-dis.c
@@ -93,10 +93,6 @@ lookup_powerpc (unsigned long insn, ppc_cpu_t dialect)
{
const struct powerpc_opcode *opcode;
const struct powerpc_opcode *opcode_end;
- unsigned long op;
-
- /* Get the major opcode of the instruction. */
- op = PPC_OP (insn);
opcode_end = powerpc_opcodes + powerpc_num_opcodes;
/* Find the first match in the opcode table for this major opcode. */
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 0ddc7ac6c5f1..82e1a3ee6e0f 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1623,7 +1623,7 @@ static void excprint(struct pt_regs *fp)
printf(" current = 0x%lx\n", current);
#ifdef CONFIG_PPC64
printf(" paca = 0x%lx\t softe: %d\t irq_happened: 0x%02x\n",
- local_paca, local_paca->soft_enabled, local_paca->irq_happened);
+ local_paca, local_paca->irq_soft_mask, local_paca->irq_happened);
#endif
if (current) {
printf(" pid = %ld, comm = %s\n",
@@ -2377,8 +2377,6 @@ static void dump_one_paca(int cpu)
printf(" slb_cache[%d]: = 0x%016lx\n", i, p->slb_cache[i]);
DUMP(p, rfi_flush_fallback_area, "px");
- DUMP(p, l1d_flush_congruence, "llx");
- DUMP(p, l1d_flush_sets, "llx");
#endif
DUMP(p, dscr_default, "llx");
#ifdef CONFIG_PPC_BOOK3E
@@ -2395,7 +2393,7 @@ static void dump_one_paca(int cpu)
DUMP(p, stab_rr, "lx");
DUMP(p, saved_r1, "lx");
DUMP(p, trap_save, "x");
- DUMP(p, soft_enabled, "x");
+ DUMP(p, irq_soft_mask, "x");
DUMP(p, irq_happened, "x");
DUMP(p, io_sync, "x");
DUMP(p, irq_work_pending, "x");
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 72d5149bcaa1..b0771ceabb4b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -116,6 +116,7 @@ config X86
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT
select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT
select HAVE_ARCH_SECCOMP_FILTER
+ select HAVE_ARCH_THREAD_STRUCT_WHITELIST
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 1e3883e45687..74f6eee15179 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -21,6 +21,7 @@
#include <linux/export.h>
#include <linux/context_tracking.h>
#include <linux/user-return-notifier.h>
+#include <linux/nospec.h>
#include <linux/uprobes.h>
#include <linux/livepatch.h>
#include <linux/syscalls.h>
@@ -206,7 +207,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
* special case only applies after poking regs and before the
* very next return to user mode.
*/
- current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
+ ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
#endif
user_enter_irqoff();
@@ -282,7 +283,8 @@ __visible void do_syscall_64(struct pt_regs *regs)
* regs->orig_ax, which changes the behavior of some syscalls.
*/
if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) {
- regs->ax = sys_call_table[nr & __SYSCALL_MASK](
+ nr = array_index_nospec(nr & __SYSCALL_MASK, NR_syscalls);
+ regs->ax = sys_call_table[nr](
regs->di, regs->si, regs->dx,
regs->r10, regs->r8, regs->r9);
}
@@ -304,7 +306,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
unsigned int nr = (unsigned int)regs->orig_ax;
#ifdef CONFIG_IA32_EMULATION
- current->thread.status |= TS_COMPAT;
+ ti->status |= TS_COMPAT;
#endif
if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) {
@@ -318,6 +320,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
}
if (likely(nr < IA32_NR_syscalls)) {
+ nr = array_index_nospec(nr, IA32_NR_syscalls);
/*
* It's possible that a 32-bit syscall implementation
* takes a 64-bit parameter but nonetheless assumes that
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index a83570495162..c752abe89d80 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -236,91 +236,20 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
pushq %r9 /* pt_regs->r9 */
pushq %r10 /* pt_regs->r10 */
pushq %r11 /* pt_regs->r11 */
- sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
- UNWIND_HINT_REGS extra=0
-
- TRACE_IRQS_OFF
-
- /*
- * If we need to do entry work or if we guess we'll need to do
- * exit work, go straight to the slow path.
- */
- movq PER_CPU_VAR(current_task), %r11
- testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
- jnz entry_SYSCALL64_slow_path
-
-entry_SYSCALL_64_fastpath:
- /*
- * Easy case: enable interrupts and issue the syscall. If the syscall
- * needs pt_regs, we'll call a stub that disables interrupts again
- * and jumps to the slow path.
- */
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_NONE)
-#if __SYSCALL_MASK == ~0
- cmpq $__NR_syscall_max, %rax
-#else
- andl $__SYSCALL_MASK, %eax
- cmpl $__NR_syscall_max, %eax
-#endif
- ja 1f /* return -ENOSYS (already in pt_regs->ax) */
- movq %r10, %rcx
-
- /*
- * This call instruction is handled specially in stub_ptregs_64.
- * It might end up jumping to the slow path. If it jumps, RAX
- * and all argument registers are clobbered.
- */
-#ifdef CONFIG_RETPOLINE
- movq sys_call_table(, %rax, 8), %rax
- call __x86_indirect_thunk_rax
-#else
- call *sys_call_table(, %rax, 8)
-#endif
-.Lentry_SYSCALL_64_after_fastpath_call:
-
- movq %rax, RAX(%rsp)
-1:
+ pushq %rbx /* pt_regs->rbx */
+ pushq %rbp /* pt_regs->rbp */
+ pushq %r12 /* pt_regs->r12 */
+ pushq %r13 /* pt_regs->r13 */
+ pushq %r14 /* pt_regs->r14 */
+ pushq %r15 /* pt_regs->r15 */
+ UNWIND_HINT_REGS
- /*
- * If we get here, then we know that pt_regs is clean for SYSRET64.
- * If we see that no exit work is required (which we are required
- * to check with IRQs off), then we can go straight to SYSRET64.
- */
- DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF
- movq PER_CPU_VAR(current_task), %r11
- testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
- jnz 1f
-
- LOCKDEP_SYS_EXIT
- TRACE_IRQS_ON /* user mode is traced as IRQs on */
- movq RIP(%rsp), %rcx
- movq EFLAGS(%rsp), %r11
- addq $6*8, %rsp /* skip extra regs -- they were preserved */
- UNWIND_HINT_EMPTY
- jmp .Lpop_c_regs_except_rcx_r11_and_sysret
-1:
- /*
- * The fast path looked good when we started, but something changed
- * along the way and we need to switch to the slow path. Calling
- * raise(3) will trigger this, for example. IRQs are off.
- */
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_ANY)
- SAVE_EXTRA_REGS
- movq %rsp, %rdi
- call syscall_return_slowpath /* returns with IRQs disabled */
- jmp return_from_SYSCALL_64
-
-entry_SYSCALL64_slow_path:
/* IRQs are off. */
- SAVE_EXTRA_REGS
movq %rsp, %rdi
call do_syscall_64 /* returns with IRQs disabled */
-return_from_SYSCALL_64:
TRACE_IRQS_IRETQ /* we're about to change IF */
/*
@@ -393,7 +322,6 @@ syscall_return_via_sysret:
/* rcx and r11 are already restored (see code above) */
UNWIND_HINT_EMPTY
POP_EXTRA_REGS
-.Lpop_c_regs_except_rcx_r11_and_sysret:
popq %rsi /* skip r11 */
popq %r10
popq %r9
@@ -424,47 +352,6 @@ syscall_return_via_sysret:
USERGS_SYSRET64
END(entry_SYSCALL_64)
-ENTRY(stub_ptregs_64)
- /*
- * Syscalls marked as needing ptregs land here.
- * If we are on the fast path, we need to save the extra regs,
- * which we achieve by trying again on the slow path. If we are on
- * the slow path, the extra regs are already saved.
- *
- * RAX stores a pointer to the C function implementing the syscall.
- * IRQs are on.
- */
- cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
- jne 1f
-
- /*
- * Called from fast path -- disable IRQs again, pop return address
- * and jump to slow path
- */
- DISABLE_INTERRUPTS(CLBR_ANY)
- TRACE_IRQS_OFF
- popq %rax
- UNWIND_HINT_REGS extra=0
- jmp entry_SYSCALL64_slow_path
-
-1:
- JMP_NOSPEC %rax /* Called from C */
-END(stub_ptregs_64)
-
-.macro ptregs_stub func
-ENTRY(ptregs_\func)
- UNWIND_HINT_FUNC
- leaq \func(%rip), %rax
- jmp stub_ptregs_64
-END(ptregs_\func)
-.endm
-
-/* Instantiate ptregs_stub for each ptregs-using syscall */
-#define __SYSCALL_64_QUAL_(sym)
-#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym
-#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
-#include <asm/syscalls_64.h>
-
/*
* %rdi: prev task
* %rsi: next task
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index 9c09775e589d..c176d2fab1da 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -7,14 +7,11 @@
#include <asm/asm-offsets.h>
#include <asm/syscall.h>
-#define __SYSCALL_64_QUAL_(sym) sym
-#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym
-
-#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
+#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
#include <asm/syscalls_64.h>
#undef __SYSCALL_64
-#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym),
+#define __SYSCALL_64(nr, sym, qual) [nr] = sym,
extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 7fb336210e1b..30d406146016 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -24,6 +24,34 @@
#define wmb() asm volatile("sfence" ::: "memory")
#endif
+/**
+ * array_index_mask_nospec() - generate a mask that is ~0UL when the
+ * bounds check succeeds and 0 otherwise
+ * @index: array element index
+ * @size: number of elements in array
+ *
+ * Returns:
+ * 0 - (index < size)
+ */
+static inline unsigned long array_index_mask_nospec(unsigned long index,
+ unsigned long size)
+{
+ unsigned long mask;
+
+ asm ("cmp %1,%2; sbb %0,%0;"
+ :"=r" (mask)
+ :"r"(size),"r" (index)
+ :"cc");
+ return mask;
+}
+
+/* Override the default implementation from linux/nospec.h. */
+#define array_index_mask_nospec array_index_mask_nospec
+
+/* Prevent speculative execution past this barrier. */
+#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \
+ "lfence", X86_FEATURE_LFENCE_RDTSC)
+
#ifdef CONFIG_X86_PPRO_FENCE
#define dma_rmb() rmb()
#else
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 64c4a30e0d39..e203169931c7 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -137,8 +137,10 @@ enum fixed_addresses {
extern void reserve_top_address(unsigned long reserve);
-#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
-#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
+#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
+#define FIXADDR_TOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_TOT_START (FIXADDR_TOP - FIXADDR_TOT_SIZE)
extern int fixmaps_set;
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 07962f5f6fba..30df295f6d94 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -214,8 +214,7 @@ static __always_inline unsigned long long rdtsc_ordered(void)
* that some other imaginary CPU is updating continuously with a
* time stamp.
*/
- alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
- "lfence", X86_FEATURE_LFENCE_RDTSC);
+ barrier_nospec();
return rdtsc();
}
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index d15d471348b8..4d57894635f2 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -150,7 +150,7 @@ extern char __indirect_thunk_end[];
* On VMEXIT we must ensure that no RSB predictions learned in the guest
* can be followed in the host, by overwriting the RSB completely. Both
* retpoline and IBRS mitigations for Spectre v2 need this; only on future
- * CPUs with IBRS_ATT *might* it be avoided.
+ * CPUs with IBRS_ALL *might* it be avoided.
*/
static inline void vmexit_fill_RSB(void)
{
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index ce245b0cdfca..0777e18a1d23 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -44,8 +44,9 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
*/
#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40)
-#define CPU_ENTRY_AREA_BASE \
- ((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK)
+#define CPU_ENTRY_AREA_BASE \
+ ((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) \
+ & PMD_MASK)
#define PKMAP_BASE \
((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index efbde088a718..793bae7e7ce3 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -460,8 +460,6 @@ struct thread_struct {
unsigned short gsindex;
#endif
- u32 status; /* thread synchronous flags */
-
#ifdef CONFIG_X86_64
unsigned long fsbase;
unsigned long gsbase;
@@ -507,6 +505,14 @@ struct thread_struct {
*/
};
+/* Whitelist the FPU state from the task_struct for hardened usercopy. */
+static inline void arch_thread_struct_whitelist(unsigned long *offset,
+ unsigned long *size)
+{
+ *offset = offsetof(struct thread_struct, fpu.state);
+ *size = fpu_kernel_xstate_size;
+}
+
/*
* Thread-synchronous status.
*
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index e3c95e8e61c5..03eedc21246d 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task,
* TS_COMPAT is set for 32-bit syscall entries and then
* remains set until we return to user mode.
*/
- if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
+ if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED))
/*
* Sign-extend the value so (int)-EFOO becomes (long)-EFOO
* and will match correctly in comparisons.
@@ -116,7 +116,7 @@ static inline void syscall_get_arguments(struct task_struct *task,
unsigned long *args)
{
# ifdef CONFIG_IA32_EMULATION
- if (task->thread.status & TS_COMPAT)
+ if (task->thread_info.status & TS_COMPAT)
switch (i) {
case 0:
if (!n--) break;
@@ -177,7 +177,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
const unsigned long *args)
{
# ifdef CONFIG_IA32_EMULATION
- if (task->thread.status & TS_COMPAT)
+ if (task->thread_info.status & TS_COMPAT)
switch (i) {
case 0:
if (!n--) break;
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index d25a638a2720..a5d9521bb2cb 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -55,6 +55,7 @@ struct task_struct;
struct thread_info {
unsigned long flags; /* low level flags */
+ u32 status; /* thread synchronous flags */
};
#define INIT_THREAD_INFO(tsk) \
@@ -219,7 +220,7 @@ static inline int arch_within_stack_frames(const void * const stack,
#define in_ia32_syscall() true
#else
#define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \
- current->thread.status & TS_COMPAT)
+ current_thread_info()->status & TS_COMPAT)
#endif
/*
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index d33e4a26dc7e..2b8f18ca5874 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -174,6 +174,8 @@ struct tlb_state {
struct mm_struct *loaded_mm;
u16 loaded_mm_asid;
u16 next_asid;
+ /* last user mm's ctx id */
+ u64 last_ctx_id;
/*
* We can be in one of several states:
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 574dff4d2913..aae77eb8491c 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -124,6 +124,11 @@ extern int __get_user_bad(void);
#define __uaccess_begin() stac()
#define __uaccess_end() clac()
+#define __uaccess_begin_nospec() \
+({ \
+ stac(); \
+ barrier_nospec(); \
+})
/*
* This is a type: either unsigned long, if the argument fits into
@@ -445,7 +450,7 @@ do { \
({ \
int __gu_err; \
__inttype(*(ptr)) __gu_val; \
- __uaccess_begin(); \
+ __uaccess_begin_nospec(); \
__get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \
__uaccess_end(); \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
@@ -487,6 +492,10 @@ struct __large_struct { unsigned long buf[100]; };
__uaccess_begin(); \
barrier();
+#define uaccess_try_nospec do { \
+ current->thread.uaccess_err = 0; \
+ __uaccess_begin_nospec(); \
+
#define uaccess_catch(err) \
__uaccess_end(); \
(err) |= (current->thread.uaccess_err ? -EFAULT : 0); \
@@ -548,7 +557,7 @@ struct __large_struct { unsigned long buf[100]; };
* get_user_ex(...);
* } get_user_catch(err)
*/
-#define get_user_try uaccess_try
+#define get_user_try uaccess_try_nospec
#define get_user_catch(err) uaccess_catch(err)
#define get_user_ex(x, ptr) do { \
@@ -582,7 +591,7 @@ extern void __cmpxchg_wrong_size(void)
__typeof__(ptr) __uval = (uval); \
__typeof__(*(ptr)) __old = (old); \
__typeof__(*(ptr)) __new = (new); \
- __uaccess_begin(); \
+ __uaccess_begin_nospec(); \
switch (size) { \
case 1: \
{ \
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 72950401b223..ba2dc1930630 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -29,21 +29,21 @@ raw_copy_from_user(void *to, const void __user *from, unsigned long n)
switch (n) {
case 1:
ret = 0;
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_asm_nozero(*(u8 *)to, from, ret,
"b", "b", "=q", 1);
__uaccess_end();
return ret;
case 2:
ret = 0;
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_asm_nozero(*(u16 *)to, from, ret,
"w", "w", "=r", 2);
__uaccess_end();
return ret;
case 4:
ret = 0;
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_asm_nozero(*(u32 *)to, from, ret,
"l", "k", "=r", 4);
__uaccess_end();
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index f07ef3c575db..62546b3a398e 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -55,31 +55,31 @@ raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
return copy_user_generic(dst, (__force void *)src, size);
switch (size) {
case 1:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_asm_nozero(*(u8 *)dst, (u8 __user *)src,
ret, "b", "b", "=q", 1);
__uaccess_end();
return ret;
case 2:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_asm_nozero(*(u16 *)dst, (u16 __user *)src,
ret, "w", "w", "=r", 2);
__uaccess_end();
return ret;
case 4:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_asm_nozero(*(u32 *)dst, (u32 __user *)src,
ret, "l", "k", "=r", 4);
__uaccess_end();
return ret;
case 8:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src,
ret, "q", "", "=r", 8);
__uaccess_end();
return ret;
case 10:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src,
ret, "q", "", "=r", 10);
if (likely(!ret))
@@ -89,7 +89,7 @@ raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
__uaccess_end();
return ret;
case 16:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src,
ret, "q", "", "=r", 16);
if (likely(!ret))
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 30571fdaaf6f..a481763a3776 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -46,17 +46,6 @@ static int __init setup_noreplace_smp(char *str)
}
__setup("noreplace-smp", setup_noreplace_smp);
-#ifdef CONFIG_PARAVIRT
-static int __initdata_or_module noreplace_paravirt = 0;
-
-static int __init setup_noreplace_paravirt(char *str)
-{
- noreplace_paravirt = 1;
- return 1;
-}
-__setup("noreplace-paravirt", setup_noreplace_paravirt);
-#endif
-
#define DPRINTK(fmt, args...) \
do { \
if (debug_alternative) \
@@ -599,9 +588,6 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
struct paravirt_patch_site *p;
char insnbuf[MAX_PATCH_LEN];
- if (noreplace_paravirt)
- return;
-
for (p = start; p < end; p++) {
unsigned int used;
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 3bfb2b23d79c..71949bf2de5a 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -103,7 +103,7 @@ bool retpoline_module_ok(bool has_retpoline)
if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline)
return true;
- pr_err("System may be vunerable to spectre v2\n");
+ pr_err("System may be vulnerable to spectre v2\n");
spectre_v2_bad_module = true;
return false;
}
@@ -119,13 +119,13 @@ static inline const char *spectre_v2_module_string(void) { return ""; }
static void __init spec2_print_if_insecure(const char *reason)
{
if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
- pr_info("%s\n", reason);
+ pr_info("%s selected on command line.\n", reason);
}
static void __init spec2_print_if_secure(const char *reason)
{
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
- pr_info("%s\n", reason);
+ pr_info("%s selected on command line.\n", reason);
}
static inline bool retp_compiler(void)
@@ -140,42 +140,68 @@ static inline bool match_option(const char *arg, int arglen, const char *opt)
return len == arglen && !strncmp(arg, opt, len);
}
+static const struct {
+ const char *option;
+ enum spectre_v2_mitigation_cmd cmd;
+ bool secure;
+} mitigation_options[] = {
+ { "off", SPECTRE_V2_CMD_NONE, false },
+ { "on", SPECTRE_V2_CMD_FORCE, true },
+ { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
+ { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false },
+ { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
+ { "auto", SPECTRE_V2_CMD_AUTO, false },
+};
+
static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
{
char arg[20];
- int ret;
-
- ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
- sizeof(arg));
- if (ret > 0) {
- if (match_option(arg, ret, "off")) {
- goto disable;
- } else if (match_option(arg, ret, "on")) {
- spec2_print_if_secure("force enabled on command line.");
- return SPECTRE_V2_CMD_FORCE;
- } else if (match_option(arg, ret, "retpoline")) {
- spec2_print_if_insecure("retpoline selected on command line.");
- return SPECTRE_V2_CMD_RETPOLINE;
- } else if (match_option(arg, ret, "retpoline,amd")) {
- if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
- pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
- return SPECTRE_V2_CMD_AUTO;
- }
- spec2_print_if_insecure("AMD retpoline selected on command line.");
- return SPECTRE_V2_CMD_RETPOLINE_AMD;
- } else if (match_option(arg, ret, "retpoline,generic")) {
- spec2_print_if_insecure("generic retpoline selected on command line.");
- return SPECTRE_V2_CMD_RETPOLINE_GENERIC;
- } else if (match_option(arg, ret, "auto")) {
+ int ret, i;
+ enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
+
+ if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
+ return SPECTRE_V2_CMD_NONE;
+ else {
+ ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
+ sizeof(arg));
+ if (ret < 0)
return SPECTRE_V2_CMD_AUTO;
+
+ for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
+ if (!match_option(arg, ret, mitigation_options[i].option))
+ continue;
+ cmd = mitigation_options[i].cmd;
+ break;
}
+
+ if (i >= ARRAY_SIZE(mitigation_options)) {
+ pr_err("unknown option (%s). Switching to AUTO select\n",
+ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+ }
+
+ if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||
+ cmd == SPECTRE_V2_CMD_RETPOLINE_AMD ||
+ cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) &&
+ !IS_ENABLED(CONFIG_RETPOLINE)) {
+ pr_err("%s selected but not compiled in. Switching to AUTO select\n",
+ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
}
- if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
+ if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD &&
+ boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
+ pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
return SPECTRE_V2_CMD_AUTO;
-disable:
- spec2_print_if_insecure("disabled on command line.");
- return SPECTRE_V2_CMD_NONE;
+ }
+
+ if (mitigation_options[i].secure)
+ spec2_print_if_secure(mitigation_options[i].option);
+ else
+ spec2_print_if_insecure(mitigation_options[i].option);
+
+ return cmd;
}
/* Check for Skylake-like CPUs (for RSB handling) */
@@ -213,10 +239,10 @@ static void __init spectre_v2_select_mitigation(void)
return;
case SPECTRE_V2_CMD_FORCE:
- /* FALLTRHU */
case SPECTRE_V2_CMD_AUTO:
- goto retpoline_auto;
-
+ if (IS_ENABLED(CONFIG_RETPOLINE))
+ goto retpoline_auto;
+ break;
case SPECTRE_V2_CMD_RETPOLINE_AMD:
if (IS_ENABLED(CONFIG_RETPOLINE))
goto retpoline_amd;
@@ -297,7 +323,7 @@ ssize_t cpu_show_spectre_v1(struct device *dev,
{
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
return sprintf(buf, "Not affected\n");
- return sprintf(buf, "Vulnerable\n");
+ return sprintf(buf, "Mitigation: __user pointer sanitization\n");
}
ssize_t cpu_show_spectre_v2(struct device *dev,
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c7c996a692fd..d63f4b5706e4 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -750,6 +750,26 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
}
}
+static void init_speculation_control(struct cpuinfo_x86 *c)
+{
+ /*
+ * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support,
+ * and they also have a different bit for STIBP support. Also,
+ * a hypervisor might have set the individual AMD bits even on
+ * Intel CPUs, for finer-grained selection of what's available.
+ *
+ * We use the AMD bits in 0x8000_0008 EBX as the generic hardware
+ * features, which are visible in /proc/cpuinfo and used by the
+ * kernel. So set those accordingly from the Intel bits.
+ */
+ if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
+ set_cpu_cap(c, X86_FEATURE_IBRS);
+ set_cpu_cap(c, X86_FEATURE_IBPB);
+ }
+ if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
+ set_cpu_cap(c, X86_FEATURE_STIBP);
+}
+
void get_cpu_cap(struct cpuinfo_x86 *c)
{
u32 eax, ebx, ecx, edx;
@@ -844,6 +864,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
init_scattered_cpuid_features(c);
+ init_speculation_control(c);
/*
* Clear/Set all flags overridden by options, after probe.
@@ -879,7 +900,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
#endif
}
-static const __initdata struct x86_cpu_id cpu_no_speculation[] = {
+static const __initconst struct x86_cpu_id cpu_no_speculation[] = {
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY },
@@ -892,7 +913,7 @@ static const __initdata struct x86_cpu_id cpu_no_speculation[] = {
{}
};
-static const __initdata struct x86_cpu_id cpu_no_meltdown[] = {
+static const __initconst struct x86_cpu_id cpu_no_meltdown[] = {
{ X86_VENDOR_AMD },
{}
};
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 6936d14d4c77..319bf989fad1 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -175,28 +175,17 @@ static void early_init_intel(struct cpuinfo_x86 *c)
if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64))
c->microcode = intel_get_microcode_revision();
- /*
- * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support,
- * and they also have a different bit for STIBP support. Also,
- * a hypervisor might have set the individual AMD bits even on
- * Intel CPUs, for finer-grained selection of what's available.
- */
- if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
- set_cpu_cap(c, X86_FEATURE_IBRS);
- set_cpu_cap(c, X86_FEATURE_IBPB);
- }
- if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
- set_cpu_cap(c, X86_FEATURE_STIBP);
-
/* Now if any of them are set, check the blacklist and clear the lot */
- if ((cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) ||
+ if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) ||
+ cpu_has(c, X86_FEATURE_INTEL_STIBP) ||
+ cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) ||
cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) {
pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n");
- clear_cpu_cap(c, X86_FEATURE_IBRS);
- clear_cpu_cap(c, X86_FEATURE_IBPB);
- clear_cpu_cap(c, X86_FEATURE_STIBP);
- clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL);
- clear_cpu_cap(c, X86_FEATURE_INTEL_STIBP);
+ setup_clear_cpu_cap(X86_FEATURE_IBRS);
+ setup_clear_cpu_cap(X86_FEATURE_IBPB);
+ setup_clear_cpu_cap(X86_FEATURE_STIBP);
+ setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL);
+ setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP);
}
/*
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index afbecff161d1..a2d8a3908670 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -109,7 +109,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
struct stack_info stack_info = {0};
unsigned long visit_mask = 0;
int graph_idx = 0;
- bool partial;
+ bool partial = false;
printk("%sCall Trace:\n", log_lvl);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index c75466232016..9eb448c7859d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -557,7 +557,7 @@ static void __set_personality_x32(void)
* Pretend to come from a x32 execve.
*/
task_pt_regs(current)->orig_ax = __NR_x32_execve | __X32_SYSCALL_BIT;
- current->thread.status &= ~TS_COMPAT;
+ current_thread_info()->status &= ~TS_COMPAT;
#endif
}
@@ -571,7 +571,7 @@ static void __set_personality_ia32(void)
current->personality |= force_personality32;
/* Prepare the first "return" to user space */
task_pt_regs(current)->orig_ax = __NR_ia32_execve;
- current->thread.status |= TS_COMPAT;
+ current_thread_info()->status |= TS_COMPAT;
#endif
}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index f37d18124648..ed5c4cdf0a34 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -935,7 +935,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value)
*/
regs->orig_ax = value;
if (syscall_get_nr(child, regs) >= 0)
- child->thread.status |= TS_I386_REGS_POKED;
+ child->thread_info.status |= TS_I386_REGS_POKED;
break;
case offsetof(struct user32, regs.eflags):
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index 307d3bac5f04..11eda21eb697 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -68,6 +68,9 @@ relocate_kernel:
movq %cr4, %rax
movq %rax, CR4(%r11)
+ /* Save CR4. Required to enable the right paging mode later. */
+ movq %rax, %r13
+
/* zero out flags, and disable interrupts */
pushq $0
popfq
@@ -126,8 +129,13 @@ identity_mapped:
/*
* Set cr4 to a known state:
* - physical address extension enabled
+ * - 5-level paging, if it was enabled before
*/
movl $X86_CR4_PAE, %eax
+ testq $X86_CR4_LA57, %r13
+ jz 1f
+ orl $X86_CR4_LA57, %eax
+1:
movq %rax, %cr4
jmp 1f
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index b9e00e8f1c9b..4cdc0b27ec82 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -787,7 +787,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
* than the tracee.
*/
#ifdef CONFIG_IA32_EMULATION
- if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
+ if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED))
return __NR_ia32_restart_syscall;
#endif
#ifdef CONFIG_X86_X32_ABI
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 0099e10eb045..13f5d4217e4f 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -67,9 +67,7 @@ u64 kvm_supported_xcr0(void)
#define F(x) bit(X86_FEATURE_##x)
-/* These are scattered features in cpufeatures.h. */
-#define KVM_CPUID_BIT_AVX512_4VNNIW 2
-#define KVM_CPUID_BIT_AVX512_4FMAPS 3
+/* For scattered features from cpufeatures.h; we currently expose none */
#define KF(x) bit(KVM_CPUID_BIT_##x)
int kvm_update_cpuid(struct kvm_vcpu *vcpu)
@@ -367,6 +365,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
+ /* cpuid 0x80000008.ebx */
+ const u32 kvm_cpuid_8000_0008_ebx_x86_features =
+ F(IBPB) | F(IBRS);
+
/* cpuid 0xC0000001.edx */
const u32 kvm_cpuid_C000_0001_edx_x86_features =
F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
@@ -392,7 +394,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* cpuid 7.0.edx*/
const u32 kvm_cpuid_7_0_edx_x86_features =
- KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS);
+ F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
+ F(ARCH_CAPABILITIES);
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
@@ -477,7 +480,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
entry->ecx &= ~F(PKU);
entry->edx &= kvm_cpuid_7_0_edx_x86_features;
- entry->edx &= get_scattered_cpuid_leaf(7, 0, CPUID_EDX);
+ cpuid_mask(&entry->edx, CPUID_7_EDX);
} else {
entry->ebx = 0;
entry->ecx = 0;
@@ -627,7 +630,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
if (!g_phys_as)
g_phys_as = phys_as;
entry->eax = g_phys_as | (virt_as << 8);
- entry->ebx = entry->edx = 0;
+ entry->edx = 0;
+ /* IBRS and IBPB aren't necessarily present in hardware cpuid */
+ if (boot_cpu_has(X86_FEATURE_IBPB))
+ entry->ebx |= F(IBPB);
+ if (boot_cpu_has(X86_FEATURE_IBRS))
+ entry->ebx |= F(IBRS);
+ entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
+ cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
break;
}
case 0x80000019:
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index c2cea6651279..9a327d5b6d1f 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX},
[CPUID_7_ECX] = { 7, 0, CPUID_ECX},
[CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX},
+ [CPUID_7_EDX] = { 7, 0, CPUID_EDX},
};
static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index f40d0da1f1d3..4e3c79530526 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -184,6 +184,8 @@ struct vcpu_svm {
u64 gs_base;
} host;
+ u64 spec_ctrl;
+
u32 *msrpm;
ulong nmi_iret_rip;
@@ -249,6 +251,8 @@ static const struct svm_direct_access_msrs {
{ .index = MSR_CSTAR, .always = true },
{ .index = MSR_SYSCALL_MASK, .always = true },
#endif
+ { .index = MSR_IA32_SPEC_CTRL, .always = false },
+ { .index = MSR_IA32_PRED_CMD, .always = false },
{ .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
{ .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
{ .index = MSR_IA32_LASTINTFROMIP, .always = false },
@@ -529,6 +533,7 @@ struct svm_cpu_data {
struct kvm_ldttss_desc *tss_desc;
struct page *save_area;
+ struct vmcb *current_vmcb;
};
static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
@@ -880,6 +885,25 @@ static bool valid_msr_intercept(u32 index)
return false;
}
+static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr)
+{
+ u8 bit_write;
+ unsigned long tmp;
+ u32 offset;
+ u32 *msrpm;
+
+ msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm:
+ to_svm(vcpu)->msrpm;
+
+ offset = svm_msrpm_offset(msr);
+ bit_write = 2 * (msr & 0x0f) + 1;
+ tmp = msrpm[offset];
+
+ BUG_ON(offset == MSR_INVALID);
+
+ return !!test_bit(bit_write, &tmp);
+}
+
static void set_msr_interception(u32 *msrpm, unsigned msr,
int read, int write)
{
@@ -1582,6 +1606,8 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
u32 dummy;
u32 eax = 1;
+ svm->spec_ctrl = 0;
+
if (!init_event) {
svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
MSR_IA32_APICBASE_ENABLE;
@@ -1703,11 +1729,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, svm);
+ /*
+ * The vmcb page can be recycled, causing a false negative in
+ * svm_vcpu_load(). So do a full IBPB now.
+ */
+ indirect_branch_prediction_barrier();
}
static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
int i;
if (unlikely(cpu != vcpu->cpu)) {
@@ -1736,6 +1768,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (static_cpu_has(X86_FEATURE_RDTSCP))
wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
+ if (sd->current_vmcb != svm->vmcb) {
+ sd->current_vmcb = svm->vmcb;
+ indirect_branch_prediction_barrier();
+ }
avic_vcpu_load(vcpu, cpu);
}
@@ -3593,6 +3629,13 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_VM_CR:
msr_info->data = svm->nested.vm_cr_msr;
break;
+ case MSR_IA32_SPEC_CTRL:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_IBRS))
+ return 1;
+
+ msr_info->data = svm->spec_ctrl;
+ break;
case MSR_IA32_UCODE_REV:
msr_info->data = 0x01000065;
break;
@@ -3684,6 +3727,49 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
case MSR_IA32_TSC:
kvm_write_tsc(vcpu, msr);
break;
+ case MSR_IA32_SPEC_CTRL:
+ if (!msr->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_IBRS))
+ return 1;
+
+ /* The STIBP bit doesn't fault even if it's not advertised */
+ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
+ return 1;
+
+ svm->spec_ctrl = data;
+
+ if (!data)
+ break;
+
+ /*
+ * For non-nested:
+ * When it's written (to non-zero) for the first time, pass
+ * it through.
+ *
+ * For nested:
+ * The handling of the MSR bitmap for L2 guests is done in
+ * nested_svm_vmrun_msrpm.
+ * We update the L1 MSR bit as well since it will end up
+ * touching the MSR anyway now.
+ */
+ set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
+ break;
+ case MSR_IA32_PRED_CMD:
+ if (!msr->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_IBPB))
+ return 1;
+
+ if (data & ~PRED_CMD_IBPB)
+ return 1;
+
+ if (!data)
+ break;
+
+ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
+ if (is_guest_mode(vcpu))
+ break;
+ set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
+ break;
case MSR_STAR:
svm->vmcb->save.star = data;
break;
@@ -4936,6 +5022,15 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
local_irq_enable();
+ /*
+ * If this vCPU has touched SPEC_CTRL, restore the guest's value if
+ * it's non-zero. Since vmentry is serialising on affected CPUs, there
+ * is no need to worry about the conditional branch over the wrmsr
+ * being speculatively taken.
+ */
+ if (svm->spec_ctrl)
+ wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+
asm volatile (
"push %%" _ASM_BP "; \n\t"
"mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
@@ -5028,6 +5123,27 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
#endif
);
+ /*
+ * We do not use IBRS in the kernel. If this vCPU has used the
+ * SPEC_CTRL MSR it may have left it on; save the value and
+ * turn it off. This is much more efficient than blindly adding
+ * it to the atomic save/restore list. Especially as the former
+ * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
+ *
+ * For non-nested case:
+ * If the L01 MSR bitmap does not intercept the MSR, then we need to
+ * save it.
+ *
+ * For nested case:
+ * If the L02 MSR bitmap does not intercept the MSR, then we need to
+ * save it.
+ */
+ if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
+ rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+
+ if (svm->spec_ctrl)
+ wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+
/* Eliminate branch target predictions from guest mode */
vmexit_fill_RSB();
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a8b96dc4cd83..bee4c49f6dd0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -34,6 +34,7 @@
#include <linux/tboot.h>
#include <linux/hrtimer.h>
#include <linux/frame.h>
+#include <linux/nospec.h>
#include "kvm_cache_regs.h"
#include "x86.h"
@@ -111,6 +112,14 @@ static u64 __read_mostly host_xss;
static bool __read_mostly enable_pml = 1;
module_param_named(pml, enable_pml, bool, S_IRUGO);
+#define MSR_TYPE_R 1
+#define MSR_TYPE_W 2
+#define MSR_TYPE_RW 3
+
+#define MSR_BITMAP_MODE_X2APIC 1
+#define MSR_BITMAP_MODE_X2APIC_APICV 2
+#define MSR_BITMAP_MODE_LM 4
+
#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL
/* Guest_tsc -> host_tsc conversion requires 64-bit division. */
@@ -185,7 +194,6 @@ module_param(ple_window_max, int, S_IRUGO);
extern const ulong vmx_return;
#define NR_AUTOLOAD_MSRS 8
-#define VMCS02_POOL_SIZE 1
struct vmcs {
u32 revision_id;
@@ -210,6 +218,7 @@ struct loaded_vmcs {
int soft_vnmi_blocked;
ktime_t entry_time;
s64 vnmi_blocked_time;
+ unsigned long *msr_bitmap;
struct list_head loaded_vmcss_on_cpu_link;
};
@@ -226,7 +235,7 @@ struct shared_msr_entry {
* stored in guest memory specified by VMPTRLD, but is opaque to the guest,
* which must access it using VMREAD/VMWRITE/VMCLEAR instructions.
* More than one of these structures may exist, if L1 runs multiple L2 guests.
- * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the
+ * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the
* underlying hardware which will be used to run L2.
* This structure is packed to ensure that its layout is identical across
* machines (necessary for live migration).
@@ -409,13 +418,6 @@ struct __packed vmcs12 {
*/
#define VMCS12_SIZE 0x1000
-/* Used to remember the last vmcs02 used for some recently used vmcs12s */
-struct vmcs02_list {
- struct list_head list;
- gpa_t vmptr;
- struct loaded_vmcs vmcs02;
-};
-
/*
* The nested_vmx structure is part of vcpu_vmx, and holds information we need
* for correct emulation of VMX (i.e., nested VMX) on this vcpu.
@@ -440,15 +442,15 @@ struct nested_vmx {
*/
bool sync_shadow_vmcs;
- /* vmcs02_list cache of VMCSs recently used to run L2 guests */
- struct list_head vmcs02_pool;
- int vmcs02_num;
bool change_vmcs01_virtual_x2apic_mode;
/* L2 must run next, and mustn't decide to exit to L1. */
bool nested_run_pending;
+
+ struct loaded_vmcs vmcs02;
+
/*
- * Guest pages referred to in vmcs02 with host-physical pointers, so
- * we must keep them pinned while L2 runs.
+ * Guest pages referred to in the vmcs02 with host-physical
+ * pointers, so we must keep them pinned while L2 runs.
*/
struct page *apic_access_page;
struct page *virtual_apic_page;
@@ -457,8 +459,6 @@ struct nested_vmx {
bool pi_pending;
u16 posted_intr_nv;
- unsigned long *msr_bitmap;
-
struct hrtimer preemption_timer;
bool preemption_timer_expired;
@@ -581,6 +581,7 @@ struct vcpu_vmx {
struct kvm_vcpu vcpu;
unsigned long host_rsp;
u8 fail;
+ u8 msr_bitmap_mode;
u32 exit_intr_info;
u32 idt_vectoring_info;
ulong rflags;
@@ -592,6 +593,10 @@ struct vcpu_vmx {
u64 msr_host_kernel_gs_base;
u64 msr_guest_kernel_gs_base;
#endif
+
+ u64 arch_capabilities;
+ u64 spec_ctrl;
+
u32 vm_entry_controls_shadow;
u32 vm_exit_controls_shadow;
u32 secondary_exec_control;
@@ -898,21 +903,18 @@ static const unsigned short vmcs_field_to_offset_table[] = {
static inline short vmcs_field_to_offset(unsigned long field)
{
- BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
+ const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table);
+ unsigned short offset;
- if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
+ BUILD_BUG_ON(size > SHRT_MAX);
+ if (field >= size)
return -ENOENT;
- /*
- * FIXME: Mitigation for CVE-2017-5753. To be replaced with a
- * generic mechanism.
- */
- asm("lfence");
-
- if (vmcs_field_to_offset_table[field] == 0)
+ field = array_index_nospec(field, size);
+ offset = vmcs_field_to_offset_table[field];
+ if (offset == 0)
return -ENOENT;
-
- return vmcs_field_to_offset_table[field];
+ return offset;
}
static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
@@ -935,6 +937,9 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu);
static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
u16 error_code);
+static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
+static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
+ u32 msr, int type);
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -954,12 +959,6 @@ static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
enum {
VMX_IO_BITMAP_A,
VMX_IO_BITMAP_B,
- VMX_MSR_BITMAP_LEGACY,
- VMX_MSR_BITMAP_LONGMODE,
- VMX_MSR_BITMAP_LEGACY_X2APIC_APICV,
- VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV,
- VMX_MSR_BITMAP_LEGACY_X2APIC,
- VMX_MSR_BITMAP_LONGMODE_X2APIC,
VMX_VMREAD_BITMAP,
VMX_VMWRITE_BITMAP,
VMX_BITMAP_NR
@@ -969,12 +968,6 @@ static unsigned long *vmx_bitmap[VMX_BITMAP_NR];
#define vmx_io_bitmap_a (vmx_bitmap[VMX_IO_BITMAP_A])
#define vmx_io_bitmap_b (vmx_bitmap[VMX_IO_BITMAP_B])
-#define vmx_msr_bitmap_legacy (vmx_bitmap[VMX_MSR_BITMAP_LEGACY])
-#define vmx_msr_bitmap_longmode (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE])
-#define vmx_msr_bitmap_legacy_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC_APICV])
-#define vmx_msr_bitmap_longmode_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV])
-#define vmx_msr_bitmap_legacy_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC])
-#define vmx_msr_bitmap_longmode_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC])
#define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP])
#define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP])
@@ -1918,6 +1911,52 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
vmcs_write32(EXCEPTION_BITMAP, eb);
}
+/*
+ * Check if MSR is intercepted for currently loaded MSR bitmap.
+ */
+static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
+{
+ unsigned long *msr_bitmap;
+ int f = sizeof(unsigned long);
+
+ if (!cpu_has_vmx_msr_bitmap())
+ return true;
+
+ msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap;
+
+ if (msr <= 0x1fff) {
+ return !!test_bit(msr, msr_bitmap + 0x800 / f);
+ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
+ msr &= 0x1fff;
+ return !!test_bit(msr, msr_bitmap + 0xc00 / f);
+ }
+
+ return true;
+}
+
+/*
+ * Check if MSR is intercepted for L01 MSR bitmap.
+ */
+static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr)
+{
+ unsigned long *msr_bitmap;
+ int f = sizeof(unsigned long);
+
+ if (!cpu_has_vmx_msr_bitmap())
+ return true;
+
+ msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
+
+ if (msr <= 0x1fff) {
+ return !!test_bit(msr, msr_bitmap + 0x800 / f);
+ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
+ msr &= 0x1fff;
+ return !!test_bit(msr, msr_bitmap + 0xc00 / f);
+ }
+
+ return true;
+}
+
static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
unsigned long entry, unsigned long exit)
{
@@ -2296,6 +2335,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
vmcs_load(vmx->loaded_vmcs->vmcs);
+ indirect_branch_prediction_barrier();
}
if (!already_loaded) {
@@ -2572,36 +2612,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
vmx->guest_msrs[from] = tmp;
}
-static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
-{
- unsigned long *msr_bitmap;
-
- if (is_guest_mode(vcpu))
- msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap;
- else if (cpu_has_secondary_exec_ctrls() &&
- (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
- SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
- if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) {
- if (is_long_mode(vcpu))
- msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv;
- else
- msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv;
- } else {
- if (is_long_mode(vcpu))
- msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
- else
- msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
- }
- } else {
- if (is_long_mode(vcpu))
- msr_bitmap = vmx_msr_bitmap_longmode;
- else
- msr_bitmap = vmx_msr_bitmap_legacy;
- }
-
- vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
-}
-
/*
* Set up the vmcs to automatically save and restore system
* msrs. Don't touch the 64-bit msrs if the guest is in legacy
@@ -2642,7 +2652,7 @@ static void setup_msrs(struct vcpu_vmx *vmx)
vmx->save_nmsrs = save_nmsrs;
if (cpu_has_vmx_msr_bitmap())
- vmx_set_msr_bitmap(&vmx->vcpu);
+ vmx_update_msr_bitmap(&vmx->vcpu);
}
/*
@@ -3276,6 +3286,20 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_TSC:
msr_info->data = guest_read_tsc(vcpu);
break;
+ case MSR_IA32_SPEC_CTRL:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
+ return 1;
+
+ msr_info->data = to_vmx(vcpu)->spec_ctrl;
+ break;
+ case MSR_IA32_ARCH_CAPABILITIES:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
+ return 1;
+ msr_info->data = to_vmx(vcpu)->arch_capabilities;
+ break;
case MSR_IA32_SYSENTER_CS:
msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
break;
@@ -3383,6 +3407,70 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_TSC:
kvm_write_tsc(vcpu, msr_info);
break;
+ case MSR_IA32_SPEC_CTRL:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
+ return 1;
+
+ /* The STIBP bit doesn't fault even if it's not advertised */
+ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
+ return 1;
+
+ vmx->spec_ctrl = data;
+
+ if (!data)
+ break;
+
+ /*
+ * For non-nested:
+ * When it's written (to non-zero) for the first time, pass
+ * it through.
+ *
+ * For nested:
+ * The handling of the MSR bitmap for L2 guests is done in
+ * nested_vmx_merge_msr_bitmap. We should not touch the
+ * vmcs02.msr_bitmap here since it gets completely overwritten
+ * in the merging. We update the vmcs01 here for L1 as well
+ * since it will end up touching the MSR anyway now.
+ */
+ vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap,
+ MSR_IA32_SPEC_CTRL,
+ MSR_TYPE_RW);
+ break;
+ case MSR_IA32_PRED_CMD:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_IBPB) &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
+ return 1;
+
+ if (data & ~PRED_CMD_IBPB)
+ return 1;
+
+ if (!data)
+ break;
+
+ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
+
+ /*
+ * For non-nested:
+ * When it's written (to non-zero) for the first time, pass
+ * it through.
+ *
+ * For nested:
+ * The handling of the MSR bitmap for L2 guests is done in
+ * nested_vmx_merge_msr_bitmap. We should not touch the
+ * vmcs02.msr_bitmap here since it gets completely overwritten
+ * in the merging.
+ */
+ vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
+ MSR_TYPE_W);
+ break;
+ case MSR_IA32_ARCH_CAPABILITIES:
+ if (!msr_info->host_initiated)
+ return 1;
+ vmx->arch_capabilities = data;
+ break;
case MSR_IA32_CR_PAT:
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
@@ -3837,11 +3925,6 @@ static struct vmcs *alloc_vmcs_cpu(int cpu)
return vmcs;
}
-static struct vmcs *alloc_vmcs(void)
-{
- return alloc_vmcs_cpu(raw_smp_processor_id());
-}
-
static void free_vmcs(struct vmcs *vmcs)
{
free_pages((unsigned long)vmcs, vmcs_config.order);
@@ -3857,9 +3940,38 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
loaded_vmcs_clear(loaded_vmcs);
free_vmcs(loaded_vmcs->vmcs);
loaded_vmcs->vmcs = NULL;
+ if (loaded_vmcs->msr_bitmap)
+ free_page((unsigned long)loaded_vmcs->msr_bitmap);
WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
}
+static struct vmcs *alloc_vmcs(void)
+{
+ return alloc_vmcs_cpu(raw_smp_processor_id());
+}
+
+static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
+{
+ loaded_vmcs->vmcs = alloc_vmcs();
+ if (!loaded_vmcs->vmcs)
+ return -ENOMEM;
+
+ loaded_vmcs->shadow_vmcs = NULL;
+ loaded_vmcs_init(loaded_vmcs);
+
+ if (cpu_has_vmx_msr_bitmap()) {
+ loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+ if (!loaded_vmcs->msr_bitmap)
+ goto out_vmcs;
+ memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
+ }
+ return 0;
+
+out_vmcs:
+ free_loaded_vmcs(loaded_vmcs);
+ return -ENOMEM;
+}
+
static void free_kvm_area(void)
{
int cpu;
@@ -4918,10 +5030,8 @@ static void free_vpid(int vpid)
spin_unlock(&vmx_vpid_lock);
}
-#define MSR_TYPE_R 1
-#define MSR_TYPE_W 2
-static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
- u32 msr, int type)
+static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
+ u32 msr, int type)
{
int f = sizeof(unsigned long);
@@ -4955,6 +5065,50 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
}
}
+static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
+ u32 msr, int type)
+{
+ int f = sizeof(unsigned long);
+
+ if (!cpu_has_vmx_msr_bitmap())
+ return;
+
+ /*
+ * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
+ * have the write-low and read-high bitmap offsets the wrong way round.
+ * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
+ */
+ if (msr <= 0x1fff) {
+ if (type & MSR_TYPE_R)
+ /* read-low */
+ __set_bit(msr, msr_bitmap + 0x000 / f);
+
+ if (type & MSR_TYPE_W)
+ /* write-low */
+ __set_bit(msr, msr_bitmap + 0x800 / f);
+
+ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
+ msr &= 0x1fff;
+ if (type & MSR_TYPE_R)
+ /* read-high */
+ __set_bit(msr, msr_bitmap + 0x400 / f);
+
+ if (type & MSR_TYPE_W)
+ /* write-high */
+ __set_bit(msr, msr_bitmap + 0xc00 / f);
+
+ }
+}
+
+static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
+ u32 msr, int type, bool value)
+{
+ if (value)
+ vmx_enable_intercept_for_msr(msr_bitmap, msr, type);
+ else
+ vmx_disable_intercept_for_msr(msr_bitmap, msr, type);
+}
+
/*
* If a msr is allowed by L0, we should check whether it is allowed by L1.
* The corresponding bit will be cleared unless both of L0 and L1 allow it.
@@ -5001,30 +5155,70 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
}
}
-static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
+static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
{
- if (!longmode_only)
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
- msr, MSR_TYPE_R | MSR_TYPE_W);
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
- msr, MSR_TYPE_R | MSR_TYPE_W);
+ u8 mode = 0;
+
+ if (cpu_has_secondary_exec_ctrls() &&
+ (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
+ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
+ mode |= MSR_BITMAP_MODE_X2APIC;
+ if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
+ mode |= MSR_BITMAP_MODE_X2APIC_APICV;
+ }
+
+ if (is_long_mode(vcpu))
+ mode |= MSR_BITMAP_MODE_LM;
+
+ return mode;
}
-static void vmx_disable_intercept_msr_x2apic(u32 msr, int type, bool apicv_active)
+#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
+
+static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap,
+ u8 mode)
{
- if (apicv_active) {
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv,
- msr, type);
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv,
- msr, type);
- } else {
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
- msr, type);
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
- msr, type);
+ int msr;
+
+ for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
+ unsigned word = msr / BITS_PER_LONG;
+ msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0;
+ msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
+ }
+
+ if (mode & MSR_BITMAP_MODE_X2APIC) {
+ /*
+ * TPR reads and writes can be virtualized even if virtual interrupt
+ * delivery is not in use.
+ */
+ vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW);
+ if (mode & MSR_BITMAP_MODE_X2APIC_APICV) {
+ vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R);
+ vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W);
+ vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W);
+ }
}
}
+static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
+ u8 mode = vmx_msr_bitmap_mode(vcpu);
+ u8 changed = mode ^ vmx->msr_bitmap_mode;
+
+ if (!changed)
+ return;
+
+ vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW,
+ !(mode & MSR_BITMAP_MODE_LM));
+
+ if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
+ vmx_update_msr_bitmap_x2apic(msr_bitmap, mode);
+
+ vmx->msr_bitmap_mode = mode;
+}
+
static bool vmx_get_enable_apicv(struct kvm_vcpu *vcpu)
{
return enable_apicv;
@@ -5274,7 +5468,7 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
}
if (cpu_has_vmx_msr_bitmap())
- vmx_set_msr_bitmap(vcpu);
+ vmx_update_msr_bitmap(vcpu);
}
static u32 vmx_exec_control(struct vcpu_vmx *vmx)
@@ -5461,7 +5655,7 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
}
if (cpu_has_vmx_msr_bitmap())
- vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy));
+ vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
@@ -5539,6 +5733,8 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
++vmx->nmsrs;
}
+ if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities);
vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
@@ -5567,6 +5763,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
u64 cr0;
vmx->rmode.vm86_active = 0;
+ vmx->spec_ctrl = 0;
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
kvm_set_cr8(vcpu, 0);
@@ -6744,7 +6941,7 @@ void vmx_enable_tdp(void)
static __init int hardware_setup(void)
{
- int r = -ENOMEM, i, msr;
+ int r = -ENOMEM, i;
rdmsrl_safe(MSR_EFER, &host_efer);
@@ -6764,9 +6961,6 @@ static __init int hardware_setup(void)
memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
- memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
- memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
-
if (setup_vmcs_config(&vmcs_config) < 0) {
r = -EIO;
goto out;
@@ -6835,42 +7029,8 @@ static __init int hardware_setup(void)
kvm_tsc_scaling_ratio_frac_bits = 48;
}
- vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
- vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
- vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
- vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
- vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
- vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
-
- memcpy(vmx_msr_bitmap_legacy_x2apic_apicv,
- vmx_msr_bitmap_legacy, PAGE_SIZE);
- memcpy(vmx_msr_bitmap_longmode_x2apic_apicv,
- vmx_msr_bitmap_longmode, PAGE_SIZE);
- memcpy(vmx_msr_bitmap_legacy_x2apic,
- vmx_msr_bitmap_legacy, PAGE_SIZE);
- memcpy(vmx_msr_bitmap_longmode_x2apic,
- vmx_msr_bitmap_longmode, PAGE_SIZE);
-
set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
- for (msr = 0x800; msr <= 0x8ff; msr++) {
- if (msr == 0x839 /* TMCCT */)
- continue;
- vmx_disable_intercept_msr_x2apic(msr, MSR_TYPE_R, true);
- }
-
- /*
- * TPR reads and writes can be virtualized even if virtual interrupt
- * delivery is not in use.
- */
- vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_W, true);
- vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_R | MSR_TYPE_W, false);
-
- /* EOI */
- vmx_disable_intercept_msr_x2apic(0x80b, MSR_TYPE_W, true);
- /* SELF-IPI */
- vmx_disable_intercept_msr_x2apic(0x83f, MSR_TYPE_W, true);
-
if (enable_ept)
vmx_enable_tdp();
else
@@ -6974,94 +7134,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu)
}
/*
- * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12.
- * We could reuse a single VMCS for all the L2 guests, but we also want the
- * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this
- * allows keeping them loaded on the processor, and in the future will allow
- * optimizations where prepare_vmcs02 doesn't need to set all the fields on
- * every entry if they never change.
- * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE
- * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first.
- *
- * The following functions allocate and free a vmcs02 in this pool.
- */
-
-/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */
-static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
-{
- struct vmcs02_list *item;
- list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
- if (item->vmptr == vmx->nested.current_vmptr) {
- list_move(&item->list, &vmx->nested.vmcs02_pool);
- return &item->vmcs02;
- }
-
- if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) {
- /* Recycle the least recently used VMCS. */
- item = list_last_entry(&vmx->nested.vmcs02_pool,
- struct vmcs02_list, list);
- item->vmptr = vmx->nested.current_vmptr;
- list_move(&item->list, &vmx->nested.vmcs02_pool);
- return &item->vmcs02;
- }
-
- /* Create a new VMCS */
- item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
- if (!item)
- return NULL;
- item->vmcs02.vmcs = alloc_vmcs();
- item->vmcs02.shadow_vmcs = NULL;
- if (!item->vmcs02.vmcs) {
- kfree(item);
- return NULL;
- }
- loaded_vmcs_init(&item->vmcs02);
- item->vmptr = vmx->nested.current_vmptr;
- list_add(&(item->list), &(vmx->nested.vmcs02_pool));
- vmx->nested.vmcs02_num++;
- return &item->vmcs02;
-}
-
-/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */
-static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr)
-{
- struct vmcs02_list *item;
- list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
- if (item->vmptr == vmptr) {
- free_loaded_vmcs(&item->vmcs02);
- list_del(&item->list);
- kfree(item);
- vmx->nested.vmcs02_num--;
- return;
- }
-}
-
-/*
- * Free all VMCSs saved for this vcpu, except the one pointed by
- * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs
- * must be &vmx->vmcs01.
- */
-static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx)
-{
- struct vmcs02_list *item, *n;
-
- WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01);
- list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) {
- /*
- * Something will leak if the above WARN triggers. Better than
- * a use-after-free.
- */
- if (vmx->loaded_vmcs == &item->vmcs02)
- continue;
-
- free_loaded_vmcs(&item->vmcs02);
- list_del(&item->list);
- kfree(item);
- vmx->nested.vmcs02_num--;
- }
-}
-
-/*
* The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
* set the success or error code of an emulated VMX instruction, as specified
* by Vol 2B, VMX Instruction Reference, "Conventions".
@@ -7241,13 +7313,11 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs *shadow_vmcs;
+ int r;
- if (cpu_has_vmx_msr_bitmap()) {
- vmx->nested.msr_bitmap =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx->nested.msr_bitmap)
- goto out_msr_bitmap;
- }
+ r = alloc_loaded_vmcs(&vmx->nested.vmcs02);
+ if (r < 0)
+ goto out_vmcs02;
vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
if (!vmx->nested.cached_vmcs12)
@@ -7264,9 +7334,6 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
vmx->vmcs01.shadow_vmcs = shadow_vmcs;
}
- INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
- vmx->nested.vmcs02_num = 0;
-
hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
HRTIMER_MODE_REL_PINNED);
vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
@@ -7278,9 +7345,9 @@ out_shadow_vmcs:
kfree(vmx->nested.cached_vmcs12);
out_cached_vmcs12:
- free_page((unsigned long)vmx->nested.msr_bitmap);
+ free_loaded_vmcs(&vmx->nested.vmcs02);
-out_msr_bitmap:
+out_vmcs02:
return -ENOMEM;
}
@@ -7423,10 +7490,6 @@ static void free_nested(struct vcpu_vmx *vmx)
free_vpid(vmx->nested.vpid02);
vmx->nested.posted_intr_nv = -1;
vmx->nested.current_vmptr = -1ull;
- if (vmx->nested.msr_bitmap) {
- free_page((unsigned long)vmx->nested.msr_bitmap);
- vmx->nested.msr_bitmap = NULL;
- }
if (enable_shadow_vmcs) {
vmx_disable_shadow_vmcs(vmx);
vmcs_clear(vmx->vmcs01.shadow_vmcs);
@@ -7434,7 +7497,7 @@ static void free_nested(struct vcpu_vmx *vmx)
vmx->vmcs01.shadow_vmcs = NULL;
}
kfree(vmx->nested.cached_vmcs12);
- /* Unpin physical memory we referred to in current vmcs02 */
+ /* Unpin physical memory we referred to in the vmcs02 */
if (vmx->nested.apic_access_page) {
kvm_release_page_dirty(vmx->nested.apic_access_page);
vmx->nested.apic_access_page = NULL;
@@ -7450,7 +7513,7 @@ static void free_nested(struct vcpu_vmx *vmx)
vmx->nested.pi_desc = NULL;
}
- nested_free_all_saved_vmcss(vmx);
+ free_loaded_vmcs(&vmx->nested.vmcs02);
}
/* Emulate the VMXOFF instruction */
@@ -7493,8 +7556,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
vmptr + offsetof(struct vmcs12, launch_state),
&zero, sizeof(zero));
- nested_free_vmcs02(vmx, vmptr);
-
nested_vmx_succeed(vcpu);
return kvm_skip_emulated_instruction(vcpu);
}
@@ -8406,10 +8467,11 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
/*
* The host physical addresses of some pages of guest memory
- * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU
- * may write to these pages via their host physical address while
- * L2 is running, bypassing any address-translation-based dirty
- * tracking (e.g. EPT write protection).
+ * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC
+ * Page). The CPU may write to these pages via their host
+ * physical address while L2 is running, bypassing any
+ * address-translation-based dirty tracking (e.g. EPT write
+ * protection).
*
* Mark them dirty on every exit from L2 to prevent them from
* getting out of sync with dirty tracking.
@@ -8943,7 +9005,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
}
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
- vmx_set_msr_bitmap(vcpu);
+ vmx_update_msr_bitmap(vcpu);
}
static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
@@ -9373,6 +9435,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx_arm_hv_timer(vcpu);
+ /*
+ * If this vCPU has touched SPEC_CTRL, restore the guest's value if
+ * it's non-zero. Since vmentry is serialising on affected CPUs, there
+ * is no need to worry about the conditional branch over the wrmsr
+ * being speculatively taken.
+ */
+ if (vmx->spec_ctrl)
+ wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+
vmx->__launched = vmx->loaded_vmcs->launched;
asm(
/* Store host registers */
@@ -9491,6 +9562,27 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
#endif
);
+ /*
+ * We do not use IBRS in the kernel. If this vCPU has used the
+ * SPEC_CTRL MSR it may have left it on; save the value and
+ * turn it off. This is much more efficient than blindly adding
+ * it to the atomic save/restore list. Especially as the former
+ * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
+ *
+ * For non-nested case:
+ * If the L01 MSR bitmap does not intercept the MSR, then we need to
+ * save it.
+ *
+ * For nested case:
+ * If the L02 MSR bitmap does not intercept the MSR, then we need to
+ * save it.
+ */
+ if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
+ rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+
+ if (vmx->spec_ctrl)
+ wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+
/* Eliminate branch target predictions from guest mode */
vmexit_fill_RSB();
@@ -9604,6 +9696,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
{
int err;
struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+ unsigned long *msr_bitmap;
int cpu;
if (!vmx)
@@ -9636,13 +9729,20 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
if (!vmx->guest_msrs)
goto free_pml;
- vmx->loaded_vmcs = &vmx->vmcs01;
- vmx->loaded_vmcs->vmcs = alloc_vmcs();
- vmx->loaded_vmcs->shadow_vmcs = NULL;
- if (!vmx->loaded_vmcs->vmcs)
+ err = alloc_loaded_vmcs(&vmx->vmcs01);
+ if (err < 0)
goto free_msrs;
- loaded_vmcs_init(vmx->loaded_vmcs);
+ msr_bitmap = vmx->vmcs01.msr_bitmap;
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
+ vmx->msr_bitmap_mode = 0;
+
+ vmx->loaded_vmcs = &vmx->vmcs01;
cpu = get_cpu();
vmx_vcpu_load(&vmx->vcpu, cpu);
vmx->vcpu.cpu = cpu;
@@ -10105,10 +10205,25 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
int msr;
struct page *page;
unsigned long *msr_bitmap_l1;
- unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;
+ unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
+ /*
+ * pred_cmd & spec_ctrl are trying to verify two things:
+ *
+ * 1. L0 gave a permission to L1 to actually passthrough the MSR. This
+ * ensures that we do not accidentally generate an L02 MSR bitmap
+ * from the L12 MSR bitmap that is too permissive.
+ * 2. That L1 or L2s have actually used the MSR. This avoids
+ * unnecessarily merging of the bitmap if the MSR is unused. This
+ * works properly because we only update the L01 MSR bitmap lazily.
+ * So even if L0 should pass L1 these MSRs, the L01 bitmap is only
+ * updated to reflect this when L1 (or its L2s) actually write to
+ * the MSR.
+ */
+ bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
+ bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);
- /* This shortcut is ok because we support only x2APIC MSRs so far. */
- if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
+ if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
+ !pred_cmd && !spec_ctrl)
return false;
page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap);
@@ -10141,6 +10256,19 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
MSR_TYPE_W);
}
}
+
+ if (spec_ctrl)
+ nested_vmx_disable_intercept_for_msr(
+ msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_SPEC_CTRL,
+ MSR_TYPE_R | MSR_TYPE_W);
+
+ if (pred_cmd)
+ nested_vmx_disable_intercept_for_msr(
+ msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_PRED_CMD,
+ MSR_TYPE_W);
+
kunmap(page);
kvm_release_page_clean(page);
@@ -10682,6 +10810,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
if (kvm_has_tsc_control)
decache_tsc_multiplier(vmx);
+ if (cpu_has_vmx_msr_bitmap())
+ vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
+
if (enable_vpid) {
/*
* There is no direct mapping between vpid02 and vpid12, the
@@ -10903,20 +11034,15 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- struct loaded_vmcs *vmcs02;
u32 msr_entry_idx;
u32 exit_qual;
- vmcs02 = nested_get_current_vmcs02(vmx);
- if (!vmcs02)
- return -ENOMEM;
-
enter_guest_mode(vcpu);
if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
- vmx_switch_vmcs(vcpu, vmcs02);
+ vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
vmx_segment_cache_clear(vmx);
if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) {
@@ -11485,7 +11611,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
if (cpu_has_vmx_msr_bitmap())
- vmx_set_msr_bitmap(vcpu);
+ vmx_update_msr_bitmap(vcpu);
if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
vmcs12->vm_exit_msr_load_count))
@@ -11534,10 +11660,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
vm_exit_controls_reset_shadow(vmx);
vmx_segment_cache_clear(vmx);
- /* if no vmcs02 cache requested, remove the one we used */
- if (VMCS02_POOL_SIZE == 0)
- nested_free_vmcs02(vmx, vmx->nested.current_vmptr);
-
/* Update any VMCS fields that might have changed while L2 ran */
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c53298dfbf50..f9c5171dad2b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1009,6 +1009,7 @@ static u32 msrs_to_save[] = {
#endif
MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
+ MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES
};
static unsigned num_msrs_to_save;
@@ -4237,13 +4238,14 @@ set_identity_unlock:
mutex_unlock(&kvm->lock);
break;
case KVM_XEN_HVM_CONFIG: {
+ struct kvm_xen_hvm_config xhc;
r = -EFAULT;
- if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
- sizeof(struct kvm_xen_hvm_config)))
+ if (copy_from_user(&xhc, argp, sizeof(xhc)))
goto out;
r = -EINVAL;
- if (kvm->arch.xen_hvm_config.flags)
+ if (xhc.flags)
goto out;
+ memcpy(&kvm->arch.xen_hvm_config, &xhc, sizeof(xhc));
r = 0;
break;
}
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index c97d935a29e8..49b167f73215 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -40,6 +40,8 @@ ENTRY(__get_user_1)
mov PER_CPU_VAR(current_task), %_ASM_DX
cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user
+ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
+ and %_ASM_DX, %_ASM_AX
ASM_STAC
1: movzbl (%_ASM_AX),%edx
xor %eax,%eax
@@ -54,6 +56,8 @@ ENTRY(__get_user_2)
mov PER_CPU_VAR(current_task), %_ASM_DX
cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user
+ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
+ and %_ASM_DX, %_ASM_AX
ASM_STAC
2: movzwl -1(%_ASM_AX),%edx
xor %eax,%eax
@@ -68,6 +72,8 @@ ENTRY(__get_user_4)
mov PER_CPU_VAR(current_task), %_ASM_DX
cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user
+ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
+ and %_ASM_DX, %_ASM_AX
ASM_STAC
3: movl -3(%_ASM_AX),%edx
xor %eax,%eax
@@ -83,6 +89,8 @@ ENTRY(__get_user_8)
mov PER_CPU_VAR(current_task), %_ASM_DX
cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user
+ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
+ and %_ASM_DX, %_ASM_AX
ASM_STAC
4: movq -7(%_ASM_AX),%rdx
xor %eax,%eax
@@ -94,6 +102,8 @@ ENTRY(__get_user_8)
mov PER_CPU_VAR(current_task), %_ASM_DX
cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user_8
+ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
+ and %_ASM_DX, %_ASM_AX
ASM_STAC
4: movl -7(%_ASM_AX),%edx
5: movl -3(%_ASM_AX),%ecx
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 1b377f734e64..7add8ba06887 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -331,12 +331,12 @@ do { \
unsigned long __copy_user_ll(void *to, const void *from, unsigned long n)
{
- stac();
+ __uaccess_begin_nospec();
if (movsl_is_ok(to, from, n))
__copy_user(to, from, n);
else
n = __copy_user_intel(to, from, n);
- clac();
+ __uaccess_end();
return n;
}
EXPORT_SYMBOL(__copy_user_ll);
@@ -344,7 +344,7 @@ EXPORT_SYMBOL(__copy_user_ll);
unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from,
unsigned long n)
{
- stac();
+ __uaccess_begin_nospec();
#ifdef CONFIG_X86_INTEL_USERCOPY
if (n > 64 && static_cpu_has(X86_FEATURE_XMM2))
n = __copy_user_intel_nocache(to, from, n);
@@ -353,7 +353,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr
#else
__copy_user(to, from, n);
#endif
- clac();
+ __uaccess_end();
return n;
}
EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 5bfe61a5e8e3..012d02624848 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -6,13 +6,14 @@
#include <linux/interrupt.h>
#include <linux/export.h>
#include <linux/cpu.h>
+#include <linux/debugfs.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
+#include <asm/nospec-branch.h>
#include <asm/cache.h>
#include <asm/apic.h>
#include <asm/uv/uv.h>
-#include <linux/debugfs.h>
/*
* TLB flushing, formerly SMP-only
@@ -247,6 +248,27 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
} else {
u16 new_asid;
bool need_flush;
+ u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);
+
+ /*
+ * Avoid user/user BTB poisoning by flushing the branch
+ * predictor when switching between processes. This stops
+ * one process from doing Spectre-v2 attacks on another.
+ *
+ * As an optimization, flush indirect branches only when
+ * switching into processes that disable dumping. This
+ * protects high value processes like gpg, without having
+ * too high performance overhead. IBPB is *expensive*!
+ *
+ * This will not flush branches when switching into kernel
+ * threads. It will also not flush if we switch to idle
+ * thread and back to the same process. It will flush if we
+ * switch to a different non-dumpable process.
+ */
+ if (tsk && tsk->mm &&
+ tsk->mm->context.ctx_id != last_ctx_id &&
+ get_dumpable(tsk->mm) != SUID_DUMP_USER)
+ indirect_branch_prediction_barrier();
if (IS_ENABLED(CONFIG_VMAP_STACK)) {
/*
@@ -292,6 +314,14 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
}
+ /*
+ * Record last user mm's context id, so we can avoid
+ * flushing branch buffer with IBPB if we switch back
+ * to the same user.
+ */
+ if (next != &init_mm)
+ this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
+
this_cpu_write(cpu_tlbstate.loaded_mm, next);
this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
}
@@ -369,6 +399,7 @@ void initialize_tlbstate_and_flush(void)
write_cr3(build_cr3(mm->pgd, 0));
/* Reinitialize tlbstate. */
+ this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id);
this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
this_cpu_write(cpu_tlbstate.next_asid, 1);
this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
index c35fdb585c68..afc4ed7b1578 100644
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c
@@ -145,7 +145,7 @@ static inline void resume_init_first_level_page_table(pgd_t *pg_dir)
#endif
}
-int swsusp_arch_resume(void)
+asmlinkage int swsusp_arch_resume(void)
{
int error;
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index f910c514438f..0ef5e5204968 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -174,7 +174,7 @@ out:
return 0;
}
-int swsusp_arch_resume(void)
+asmlinkage int swsusp_arch_resume(void)
{
int error;